From a744fc2f3f37d8e0bc6ce0faae76d297346ce0e9 Mon Sep 17 00:00:00 2001
From: Sarah Oberbichler <66369271+soberbichler@users.noreply.github.com>
Date: Mon, 2 Dec 2024 02:34:54 +0100
Subject: [PATCH] Update module_5.html

---
 modules/module_5.html | 114 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 114 insertions(+)
diff --git a/modules/module_5.html b/modules/module_5.html
index 8b13789..deba542 100644
--- a/modules/module_5.html
+++ b/modules/module_5.html
@@ -1 +1,115 @@
 
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Module 4 - NLP Course DMGK</title>
+    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css" rel="stylesheet">
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            line-height: 1.6;
+            color: #333;
+        }
+        .container {
+            max-width: 800px;
+            margin: 0 auto;
+            padding: 20px;
+        }
+        h1, h2, h3 {
+            color: #8B0000;
+        }
+        .notebook-link {
+            display: inline-flex;
+            align-items: center;
+            gap: 10px;
+            margin-top: 10px;
+        }
+        .back-button {
+            position: fixed;
+            top: 20px;
+            left: 20px;
+            z-index: 1000;
+        }
+        .citation {
+            margin-top: 10px;
+            padding-left: 20px;
+            border-left: 3px solid #8B0000;
+        }
+        .task {
+            margin-bottom: 15px;
+        }
+    </style>
+</head>
+<body>
+    <a href="../index.html" class="btn btn-secondary back-button">&larr; Go Back</a>
+
+    <div class="container">
+        <h1>Module 4: Large Language Models for Article Extraction and Post-OCR Correction</h1>
+        
+        <p>Module 3 will be all about Large Language models, prompting techniques and two specific NLP taks: article extraction and OCR post-correction:</p>
+        <ul>
+           Large Language Models (LLMs) are artificial intelligence systems trained on massive text datasets that can process and generate human language based on statistical patterns they've learned. Based on the Transformer architecture introduced by Vaswear et al. in 2017, these models have demonstrated measurable success in tasks like text completion, translation, and answering questions by predicting likely next tokens in a sequence. Recent research has shown that increasing model size and training data generally improves performance on standard benchmarks, with models like GPT-4 achieving over 90% accuracy on many academic and professional tests (though these scores require careful interpretation). While LLMs have proven effective for many language tasks, controlled studies have documented significant limitations including factual inaccuracies, bias reflection, and inability to truly reason - they fundamentally operate through pattern matching rather than genuine understanding.
+        </ul>
+        
+        <h3>Preparation for Module 5:</h3>
+       <ol>
+            <li>
+                <p>Read the article listed under literature below and prepare for class discussion:</p>
+                <ul>
+                    <li>Why are machine learning methods called "Black Boxes"?</li>
+                    <li>What does XAI stand for?</li>
+                    <li>What is a self-attention mechanism?</li>
+                    <li>Name a few methods to look into the "Black Box"</li>
+                    <li>Create at least one more entry in the Glossary</li>
+                </ul>
+            </li>
+        </ol>
+
+        <h3>Literature:</h3> 
+                    <p class="citation">
+                        Dobson, J.E. On reading and interpreting black box deep neural networks. Int J Digit Humanities 5, 431–449 (2023). <a href="https://doi.org/10.1007/s42803-023-00075-w" target="_blank">https://doi.org/10.1007/s42803-023-00075-w</a>
+                    </p>
+
+        
+        <h3>Notebooks we will use in class:</h3>
+    <div class="notebook-link">
+             <p>Download über API der DDB</p>
+                <a href="https://colab.research.google.com/github/ieg-dhr/NLP-Course4Humanities_2024/blob/main/Download_über_API_der_DDB.ipynb" target="_blank">
+                    <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Download_über_API_der_DDB.ipynb In Colab"/>
+        
+                </a>
+            </div>
+    <div class="notebook-link">
+        <p>Introduction to Transformers: What Can They Do?</p>
+        <a href="https://colab.research.google.com/github/ieg-dhr/NLP-Course4Humanities_2024/blob/main/Transformers_what_can_they_do.ipynb" target="_blank">
+            <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open Introduction to Transformers in Colab"/>
+        </a>
+    </div>
+    <div class="notebook-link">
+             <p>Tranformers and Semantic Search</p>
+                <a href="https://colab.research.google.com/github/ieg-dhr/NLP-Course4Humanities_2024/blob/main/Transformers_SemantischSearch.ipynb" target="_blank">
+                    <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Transformers_SemantischSearch.ipynb In Colab"/>
+        
+                </a>
+            </div>
+        
+        <h3>Workload (after class):</h3>
+        <ol>
+            <li>
+                <p>Try the semantic search for your own research question:</p>
+                <ul>
+                    <li>Can you find new relevant keywords/articles?</li>
+                </ul>
+            </li>
+        </ol>
+
+        
+        <h3>Date and Time:</h3>
+        <p>December 6, 2024 (10:00 AM to 11:30 AM)</p>
+    </div>
+
+    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/js/bootstrap.bundle.min.js"></script>
+</body>
+</html>