diff --git a/.nojekyll b/.nojekyll index e69de29..e7ffbcc 100644 --- a/.nojekyll +++ b/.nojekyll @@ -0,0 +1 @@ +22bb4d61 \ No newline at end of file diff --git a/_static/app-ui.png b/_static/app-ui.png index 4ec08c4..408855d 100644 Binary files a/_static/app-ui.png and b/_static/app-ui.png differ diff --git a/index.html b/index.html index 2ed2699..ce6951b 100644 --- a/index.html +++ b/index.html @@ -2,7 +2,7 @@ - + @@ -47,7 +47,13 @@ "collapse-after": 3, "panel-placement": "end", "type": "overlay", - "limit": 20, + "limit": 50, + "keyboard-shortcut": [ + "f", + "/", + "s" + ], + "show-item-context": false, "language": { "search-no-results-text": "No results", "search-matching-documents-text": "matching documents", @@ -56,6 +62,7 @@ "search-more-match-text": "more match in this document", "search-more-matches-text": "more matches in this document", "search-clear-button-title": "Clear", + "search-text-placeholder": "", "search-detached-cancel-button-title": "Cancel", "search-submit-button-title": "Submit", "search-label": "Search" @@ -69,47 +76,47 @@
-
@@ -121,23 +128,11 @@
-
-
-

SIC-SOC-LLM

-
- -
- - - - -
- - -
+
+

SIC-SOC-LLM

Repository status Code stability MacOS codecov

@@ -195,7 +190,7 @@

Usage

Configuration

The sic-soc-llm package uses a configuration file in TOML format to specify the paths to the data files and the names of the models to use. An example configuration file is provided in sic_soc_llm_config.toml and is read by the get_config function. The following fields are required:

- +
@@ -308,6 +303,7 @@

Data Science Campus

+ @@ -354,6 +350,33 @@

Data Science Campus { + const baseTheme = document.querySelector('#giscus-base-theme')?.value ?? 'light'; + const alternateTheme = document.querySelector('#giscus-alt-theme')?.value ?? 'dark'; + let newTheme = ''; + if(darkModeDefault) { + newTheme = isAlternate ? baseTheme : alternateTheme; + } else { + newTheme = isAlternate ? alternateTheme : baseTheme; + } + const changeGiscusTheme = () => { + // From: https://github.com/giscus/giscus/issues/336 + const sendMessage = (message) => { + const iframe = document.querySelector('iframe.giscus-frame'); + if (!iframe) return; + iframe.contentWindow.postMessage({ giscus: message }, 'https://giscus.app'); + } + sendMessage({ + setConfig: { + theme: newTheme + } + }); + } + const isGiscussLoaded = window.document.querySelector('iframe.giscus-frame') !== null; + if (isGiscussLoaded) { + changeGiscusTheme(); + } + } const toggleColorMode = (alternate) => { // Switch the stylesheets const alternateStylesheets = window.document.querySelectorAll('link.quarto-color-scheme.quarto-color-alternate'); @@ -420,13 +443,15 @@

Data Science Campus { // Read the current dark / light value let toAlternate = !hasAlternateSentinel(); toggleColorMode(toAlternate); setStyleSentinel(toAlternate); + toggleGiscusIfUsed(toAlternate, darkModeDefault); }; // Ensure there is a toggle, if there isn't float one in the top right if (window.document.querySelector('.quarto-color-scheme-toggle') === null) { @@ -461,18 +486,7 @@

Data Science CampusData Science Campus { + return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href); + } + // Inspect non-navigation links and adorn them if external + var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)'); + for (var i=0; iData Science CampusData Science Campus { + // Strip column container classes + const stripColumnClz = (el) => { + el.classList.remove("page-full", "page-columns"); + if (el.children) { + for (const child of el.children) { + stripColumnClz(child); + } + } + } + stripColumnClz(note) + if (id === null || id.startsWith('sec-')) { + // Special case sections, only their first couple elements + const container = document.createElement("div"); + if (note.children && note.children.length > 2) { + container.appendChild(note.children[0].cloneNode(true)); + for (let i = 1; i < note.children.length; i++) { + const child = note.children[i]; + if (child.tagName === "P" && child.innerText === "") { + continue; + } else { + container.appendChild(child.cloneNode(true)); + break; + } + } + if (window.Quarto?.typesetMath) { + window.Quarto.typesetMath(container); + } + return container.innerHTML + } else { + if (window.Quarto?.typesetMath) { + window.Quarto.typesetMath(note); + } + return note.innerHTML; + } + } else { + // Remove any anchor links if they are present + const anchorLink = note.querySelector('a.anchorjs-link'); + if (anchorLink) { + anchorLink.remove(); + } + if (window.Quarto?.typesetMath) { + window.Quarto.typesetMath(note); + } + // TODO in 1.5, we should make sure this works without a callout special case + if (note.classList.contains("callout")) { + return note.outerHTML; + } else { + return note.innerHTML; + } + } + } + for (var i=0; i res.text()) + .then(html => { + const parser = new DOMParser(); + const htmlDoc = parser.parseFromString(html, "text/html"); + const note = htmlDoc.getElementById(id); + if (note !== null) { + const html = processXRef(id, note); + instance.setContent(html); + } + }).finally(() => { + instance.enable(); + instance.show(); + }); + } + } else { + // See if we can fetch a full url (with no hash to target) + // This is a special case and we should probably do some content thinning / targeting + fetch(url) + .then(res => res.text()) + .then(html => { + const parser = new DOMParser(); + const htmlDoc = parser.parseFromString(html, "text/html"); + const note = htmlDoc.querySelector('main.content'); + if (note !== null) { + // This should only happen for chapter cross references + // (since there is no id in the URL) + // remove the first header + if (note.children.length > 0 && note.children[0].tagName === "HEADER") { + note.children[0].remove(); + } + const html = processXRef(null, note); + instance.setContent(html); + } + }).finally(() => { + instance.enable(); + instance.show(); + }); + } + }, function(instance) { }); } let selectedAnnoteEl; @@ -575,6 +760,7 @@

Data Science CampusData Science Campus { + elRect = undefined; + if (selectedAnnoteEl) { + selectCodeLines(selectedAnnoteEl); + } + }, 10) + ); + function throttle(fn, ms) { + let throttle = false; + let timer; + return (...args) => { + if(!throttle) { // first call gets through + fn.apply(this, args); + throttle = true; + } else { // all the others get throttled + if(timer) clearTimeout(timer); // cancel #2 + timer = setTimeout(() => { + fn.apply(this, args); + timer = throttle = false; + }, ms); + } + }; + } // Attach click handler to the DT const annoteDls = window.document.querySelectorAll('dt[data-target-cell]'); for (const annoteDlNode of annoteDls) { @@ -662,8 +874,12 @@

Data Science Campus
+ +

Demonstration notebook for the SIC data structure.

-
-
+
+
Code: Import methods and initialise
import random
 
@@ -190,8 +199,8 @@ 

1. SIC data structure

There are two additional datasets required for the SIC hierarchy object that are not part of the repository. These are the SIC structure and SIC index datasets. The following code will download these datasets from the ONS website if they are not already available.

-
-
+
+
Code: Make sure all required SIC datasets are available
import requests
 from pathlib import Path
@@ -216,8 +225,8 @@ 

1. SIC data structure

Load SIC index

-
-
+
+
Code: Load SIC index
sic_index_filepath = config["lookups"]["sic_index"]
 sic_index_df = sic_data_access.load_sic_index(sic_index_filepath)
@@ -228,7 +237,7 @@ 

Load SIC index

-

+
@@ -271,8 +280,8 @@

Load SIC index

Load SIC structure

-
-
+
+
Code: Load SIC structure
sic_structure_filepath = config["lookups"]["sic_structure"]
 sic_df = sic_data_access.load_sic_structure(sic_structure_filepath)
@@ -283,7 +292,7 @@ 

Load SIC structure

-
+
@@ -338,8 +347,8 @@

Load SIC structure

Create SIC hierarchy

-
-
+
+
Code: Create SIC hierarchy
sic = sic_hierarchy.load_hierarchy(sic_df, sic_index_df)
 
@@ -353,8 +362,8 @@ 

Create SIC hierarchy<

Example lookup

Supports a variety of common formatting patterns for SIC. Sometimes 4-digit SIC serve as 5-digit SIC

-
-
+
+
Code: Example lookup
print(sic["A011xx"])
 print(sic["A011"])
@@ -378,8 +387,8 @@ 

Example lookup

Select a random example

-
-
+
+
Code: Example SIC index entry
random.seed(seed)
 sic_node = random.choice(sic.nodes)
@@ -593,6 +602,33 @@ 

Select a random ex } } } + const toggleGiscusIfUsed = (isAlternate, darkModeDefault) => { + const baseTheme = document.querySelector('#giscus-base-theme')?.value ?? 'light'; + const alternateTheme = document.querySelector('#giscus-alt-theme')?.value ?? 'dark'; + let newTheme = ''; + if(darkModeDefault) { + newTheme = isAlternate ? baseTheme : alternateTheme; + } else { + newTheme = isAlternate ? alternateTheme : baseTheme; + } + const changeGiscusTheme = () => { + // From: https://github.com/giscus/giscus/issues/336 + const sendMessage = (message) => { + const iframe = document.querySelector('iframe.giscus-frame'); + if (!iframe) return; + iframe.contentWindow.postMessage({ giscus: message }, 'https://giscus.app'); + } + sendMessage({ + setConfig: { + theme: newTheme + } + }); + } + const isGiscussLoaded = window.document.querySelector('iframe.giscus-frame') !== null; + if (isGiscussLoaded) { + changeGiscusTheme(); + } + } const toggleColorMode = (alternate) => { // Switch the stylesheets const alternateStylesheets = window.document.querySelectorAll('link.quarto-color-scheme.quarto-color-alternate'); @@ -659,13 +695,15 @@

Select a random ex return localAlternateSentinel; } } - let localAlternateSentinel = 'alternate'; + const darkModeDefault = true; + let localAlternateSentinel = darkModeDefault ? 'alternate' : 'default'; // Dark / light mode switch window.quartoToggleColorScheme = () => { // Read the current dark / light value let toAlternate = !hasAlternateSentinel(); toggleColorMode(toAlternate); setStyleSentinel(toAlternate); + toggleGiscusIfUsed(toAlternate, darkModeDefault); }; // Ensure there is a toggle, if there isn't float one in the top right if (window.document.querySelector('.quarto-color-scheme-toggle') === null) { @@ -700,18 +738,7 @@

Select a random ex } return false; } - const clipboard = new window.ClipboardJS('.code-copy-button', { - text: function(trigger) { - const codeEl = trigger.previousElementSibling.cloneNode(true); - for (const childEl of codeEl.children) { - if (isCodeAnnotation(childEl)) { - childEl.remove(); - } - } - return codeEl.innerText; - } - }); - clipboard.on('success', function(e) { + const onCopySuccess = function(e) { // button target const button = e.trigger; // don't keep focus @@ -743,11 +770,50 @@

Select a random ex }, 1000); // clear code selection e.clearSelection(); + } + const getTextToCopy = function(trigger) { + const codeEl = trigger.previousElementSibling.cloneNode(true); + for (const childEl of codeEl.children) { + if (isCodeAnnotation(childEl)) { + childEl.remove(); + } + } + return codeEl.innerText; + } + const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', { + text: getTextToCopy }); - function tippyHover(el, contentFn) { + clipboard.on('success', onCopySuccess); + if (window.document.getElementById('quarto-embedded-source-code-modal')) { + // For code content inside modals, clipBoardJS needs to be initialized with a container option + // TODO: Check when it could be a function (https://github.com/zenorocha/clipboard.js/issues/860) + const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', { + text: getTextToCopy, + container: window.document.getElementById('quarto-embedded-source-code-modal') + }); + clipboardModal.on('success', onCopySuccess); + } + var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//); + var mailtoRegex = new RegExp(/^mailto:/); + var filterRegex = new RegExp("https:\/\/datasciencecampus\.github\.io\/sic-soc-llm\/"); + var isInternal = (href) => { + return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href); + } + // Inspect non-navigation links and adorn them if external + var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)'); + for (var i=0; iSelect a random ex interactive: true, interactiveBorder: 10, theme: 'quarto', - placement: 'bottom-start' + placement: 'bottom-start', }; + if (contentFn) { + config.content = contentFn; + } + if (onTriggerFn) { + config.onTrigger = onTriggerFn; + } + if (onUntriggerFn) { + config.onUntrigger = onUntriggerFn; + } window.tippy(el, config); } const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]'); @@ -770,7 +845,130 @@

Select a random ex try { href = new URL(href).hash; } catch {} const id = href.replace(/^#\/?/, ""); const note = window.document.getElementById(id); - return note.innerHTML; + if (note) { + return note.innerHTML; + } else { + return ""; + } + }); + } + const xrefs = window.document.querySelectorAll('a.quarto-xref'); + const processXRef = (id, note) => { + // Strip column container classes + const stripColumnClz = (el) => { + el.classList.remove("page-full", "page-columns"); + if (el.children) { + for (const child of el.children) { + stripColumnClz(child); + } + } + } + stripColumnClz(note) + if (id === null || id.startsWith('sec-')) { + // Special case sections, only their first couple elements + const container = document.createElement("div"); + if (note.children && note.children.length > 2) { + container.appendChild(note.children[0].cloneNode(true)); + for (let i = 1; i < note.children.length; i++) { + const child = note.children[i]; + if (child.tagName === "P" && child.innerText === "") { + continue; + } else { + container.appendChild(child.cloneNode(true)); + break; + } + } + if (window.Quarto?.typesetMath) { + window.Quarto.typesetMath(container); + } + return container.innerHTML + } else { + if (window.Quarto?.typesetMath) { + window.Quarto.typesetMath(note); + } + return note.innerHTML; + } + } else { + // Remove any anchor links if they are present + const anchorLink = note.querySelector('a.anchorjs-link'); + if (anchorLink) { + anchorLink.remove(); + } + if (window.Quarto?.typesetMath) { + window.Quarto.typesetMath(note); + } + // TODO in 1.5, we should make sure this works without a callout special case + if (note.classList.contains("callout")) { + return note.outerHTML; + } else { + return note.innerHTML; + } + } + } + for (var i=0; i res.text()) + .then(html => { + const parser = new DOMParser(); + const htmlDoc = parser.parseFromString(html, "text/html"); + const note = htmlDoc.getElementById(id); + if (note !== null) { + const html = processXRef(id, note); + instance.setContent(html); + } + }).finally(() => { + instance.enable(); + instance.show(); + }); + } + } else { + // See if we can fetch a full url (with no hash to target) + // This is a special case and we should probably do some content thinning / targeting + fetch(url) + .then(res => res.text()) + .then(html => { + const parser = new DOMParser(); + const htmlDoc = parser.parseFromString(html, "text/html"); + const note = htmlDoc.querySelector('main.content'); + if (note !== null) { + // This should only happen for chapter cross references + // (since there is no id in the URL) + // remove the first header + if (note.children.length > 0 && note.children[0].tagName === "HEADER") { + note.children[0].remove(); + } + const html = processXRef(null, note); + instance.setContent(html); + } + }).finally(() => { + instance.enable(); + instance.show(); + }); + } + }, function(instance) { }); } let selectedAnnoteEl; @@ -814,6 +1012,7 @@

Select a random ex } div.style.top = top - 2 + "px"; div.style.height = height + 4 + "px"; + div.style.left = 0; let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter"); if (gutterDiv === null) { gutterDiv = window.document.createElement("div"); @@ -839,6 +1038,32 @@

Select a random ex }); selectedAnnoteEl = undefined; }; + // Handle positioning of the toggle + window.addEventListener( + "resize", + throttle(() => { + elRect = undefined; + if (selectedAnnoteEl) { + selectCodeLines(selectedAnnoteEl); + } + }, 10) + ); + function throttle(fn, ms) { + let throttle = false; + let timer; + return (...args) => { + if(!throttle) { // first call gets through + fn.apply(this, args); + throttle = true; + } else { // all the others get throttled + if(timer) clearTimeout(timer); // cancel #2 + timer = setTimeout(() => { + fn.apply(this, args); + timer = throttle = false; + }, ms); + } + }; + } // Attach click handler to the DT const annoteDls = window.document.querySelectorAll('dt[data-target-cell]'); for (const annoteDlNode of annoteDls) { @@ -901,8 +1126,12 @@

Select a random ex