update page

OSU-NLP-Group · Feb 14, 2024 · dab3c33 · dab3c33
1 parent 07aa8ce
commit dab3c33
Show file tree

Hide file tree

Showing 2 changed files with 121 additions and 70 deletions.
diff --git a/index.html b/index.html
@@ -11,7 +11,6 @@
   <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
   <script src="https://kit.fontawesome.com/f8ddf9854a.js" crossorigin="anonymous"></script>
   <meta charset="utf-8">
-  <meta http-equiv="refresh" content="60"> <!-- TODO remove before production -->
   <meta name="description" content="TODO">
   <meta name="keywords"
     content="Organic Chemistry, Small Molecules, Chemistry, Large Language Model, Large Multimodal Model, artificial intelligence, AI">
@@ -101,7 +100,8 @@
         <div class="columns is-centered">
           <div class="column has-text-centered">
             <h1 class="title is-1 publication-title is-bold">
-              <img src="static/images/llasmol.svg" style="width:1em;vertical-align: middle" alt="Logo" />
+              <img src="static/images/llasmol.svg" style="width:2em;vertical-align: middle" alt="Logo" />
+              <p></p>
               <span class="mmmu" style="vertical-align: middle">LlaSMol</span>
             </h1>
             <h2 class="subtitle is-3 publication-subtitle">
@@ -153,7 +153,7 @@ <h2 class="subtitle is-3 publication-subtitle">
                   </a>
                 </span>
                 <span class="link-block">
-                  <a href="https://huggingface.co/datasets/osunlp/Mind2Web" target="_blank"
+                  <a href="https://huggingface.co/datasets/osunlp/SMolInstruct" target="_blank"
                     class="external-link button is-normal is-rounded is-dark">
                     <span class="icon">
                       <!-- <i class="far fa-images"></i> -->
@@ -164,7 +164,18 @@ <h2 class="subtitle is-3 publication-subtitle">
                   </a>
                 </span>
                 <span class="link-block">
-                  <a href="https://github.com/OSU-NLP-Group/SeeAct" target="_blank"
+                  <a href="https://huggingface.co/osunlp/LlaSMol-Mistral-7B" target="_blank"
+                    class="external-link button is-normal is-rounded is-dark">
+                    <span class="icon">
+                      <!-- <i class="far fa-images"></i> -->
+                      <p style="font-size:18px">🤗</p>
+                      <!-- 🔗 -->
+                    </span>
+                    <span>Model</span>
+                  </a>
+                </span>
+                <span class="link-block">
+                  <a href="https://github.com/OSU-NLP-Group/LlaSMol" target="_blank"
                     class="external-link button is-normal is-rounded is-dark">
                     <span class="icon">
                       <i class="fab fa-github"></i>
@@ -199,16 +210,16 @@ <h2 class="subtitle is-3 publication-subtitle">
                 <!--                </a>-->
                 <!--              </span>-->
                 <!-- Twitter Link. -->
-                <span class="link-block">
-                  <a href="https://twitter.com/ysu_nlp/status/1742398541660639637" target="_blank"
-                    class="external-link button is-normal is-rounded is-dark">
-                    <span class="icon has-text-white">
-                      <i class="fa-brands fa-x-twitter"></i>
+                <!-- <span class="link-block"> -->
+                  <!-- <a href="https://twitter.com/ysu_nlp/status/1742398541660639637" target="_blank" -->
+                    <!-- class="external-link button is-normal is-rounded is-dark"> -->
+                    <!-- <span class="icon has-text-white"> -->
+                      <!-- <i class="fa-brands fa-x-twitter"></i> -->
                       <!-- <p style="font-size:18px">🌐</p> -->
-                    </span>
-                    <span>Twitter</span>
-                  </a>
-                </span>
+                    <!-- </span> -->
+                    <!-- <span>Twitter</span> -->
+                  <!-- </a> -->
+                <!-- </span> -->
               </div>
             </div>
           </div>
@@ -243,7 +254,7 @@ <h2 class="subtitle is-3 publication-subtitle">
             for training and evaluating LLMs for chemistry. Based on SMolInstruct, we
             fine-tune a set of open-source LLMs, among which, we find that Mistral serves as
             the best base model for chemistry tasks. We further conduct analysis on the impact
-            of trainable parameters, providing insights for future research
+            of trainable parameters, providing insights for future research.
           </p>
 
         </div>
@@ -255,8 +266,8 @@ <h2 class="subtitle is-3 publication-subtitle">
   <section class="hero is-light is-small">
     <div class="hero-body has-text-centered">
       <h1 class="title is-1 mmmu">
-        <img src="/static/images/llasmol.svg" style="width:1em;vertical-align: middle" alt="Logo" />
-        <span class="mmmu" style="vertical-align: middle">Tasks</span>
+        <!-- <img src="/static/images/llasmol.svg" style="width:1em;vertical-align: middle" alt="Logo" /> -->
+        <span class="mmmu" style="vertical-align: middle">The SMolInstruct Dataset</span>
       </h1>
     </div>
   </section>
@@ -265,17 +276,34 @@ <h1 class="title is-1 mmmu">
     <div class="container">
       <div class="columns is-centered has-text-centered">
         <div class="column is-four-fifths">
-          <h2 class="title is-3">SMolInstruct Dataset</h2>
+          <!-- <h2 class="title is-3">Overview of SMolInstruct</h2> -->
           <div class="content has-text-justified">
             <p>
-              LlaSMol models are fine-tuned on SMolInstruct,
-              a dataset containing 14 meticulously selected chemistry
-              tasks and <strong>over 3 million</strong> high-quality
-              samples.
+              SMolInstruct is an instruction dataset for chemistry that focuses on small molecules.
+              It contains <strong>14 meticulously selected tasks</strong> and <strong>over 3M carefully curated samples</strong>.
+              The following image illustrates the tasks and corresponding samples.
             </p>
             <div class="content has-text-centered">
               <img src="static/images/ChemLLMFig.svg" alt="14 tasks" class="center" style="width: 100%; height: auto;">
             </div>
+            <p>
+              <strong>The merits of SMolInstruct</strong>:
+            </p>
+            <p>
+              (1) <strong>Large-Scale</strong>. SMolInstruct consists of 3.4M distinct samples and 1.6M distinct molecules,
+              with a diverse range of sizes, structures, and properties, showcasing an
+              extensive coverage of diverse chemical knowledge.
+            </p>
+            <p>
+              (2) <strong>Comprehensive</strong>. SMolInstruct contains 4 types of chemical tasks (14 tasks in total), emerging
+              as the most comprehensive instruction tuning dataset for small molecules. Notably, the tasks are
+              meticulously selected to build a strong chemistry foundation.
+            </p>
+            <p>
+              (3) <strong>High-Quality</strong>. Rigorous processing steps have been implemented to exclude problematic and low-
+              quality samples. Along with careful data splitting and canonicalization of SMILES representations
+              SMolInstruct stands as a high-quality resource valuable for future research.
+            </p>
           </div>
         </div>
       </div>
@@ -287,8 +315,8 @@ <h2 class="title is-3">SMolInstruct Dataset</h2>
   <section class="hero is-light is-small">
     <div class="hero-body has-text-centered">
       <h1 class="title is-1 mmmu">
-        <img src="static/images/llasmol.svg" style="width:1em;vertical-align: middle" alt="Logo" />
-        <span class="mmmu" style="vertical-align: middle">Experiments and Results</span>
+        <!-- <img src="static/images/llasmol.svg" style="width:1em;vertical-align: middle" alt="Logo" /> -->
+        <span class="mmmu" style="vertical-align: middle">The LlaSMol Models</span>
       </h1>
     </div>
   </section>
@@ -297,47 +325,60 @@ <h1 class="title is-1 mmmu">
     <div class="container">
       <div class="columns is-centered has-text-centered">
         <div class="column is-four-fifths">
-          <h2 class="title is-3"></h2>
+          <!-- <h2 class="title is-3">Overview of SMolInstruct</h2> -->
           <div class="content has-text-justified">
-            <div id="results-carousel" class="carousel results-carousel">
-              <div class="box m-5">
-                <div class="content has-text-centered">
-                  <img src="static/images/tables/o_1.png" alt="TODO" width="80%" />
-                  <p> TODO: description </p>
-                </div>
-              </div>
-
-              <div class="box m-5">
-                <div class="content has-text-centered">
-                  <img src="static/images/tables/o_2.png" alt="TODO" width="80%" />
-                  <p> TODO: descsription </p>
-                </div>
-              </div>
+            <p>
+              LlaSMol are a series of LLMs built for conducting various chemistry tasks.
+              Specifically, we use Galactica, Llama 2, Code Llama, and Mistral as the base models, and conduct instruction tuning with LoRA
+              on our SMolInstruct dataset. The resulting models are named as <strong>LlaSMol<sub>Galactica</sub></strong>, 
+              <strong>LlaSMol<sub>Llama 2</sub></strong>, <strong>LlaSMol<sub>Code Llama</sub></strong>, <strong>LlaSMol<sub>Mistral</sub></strong>,
+              respectively.
+            </p>
+          </div>
+        </div>
+      </div>
+    </div>
+  </section>
 
-              <div class="box m-5">
-                <div class="content has-text-centered">
-                  <img src="static/images/element_grounding_examples/example_image_annotation.svg"
-                    alt="algebraic reasoning" width="50%" />
-                  <p> An example of grounding via image annotation.</p>
-                </div>
-              </div>
 
-              <div class="box m-5">
-                <div class="content has-text-centered">
-                  <img src="static/images/element_grounding_examples/example_element_attributes_1.svg"
-                    alt="algebraic reasoning" width="50%" />
-                  <p> An example of action generation in grounding via element attributes.</p>
-                </div>
-              </div>
+  <section class="hero is-light is-small">
+    <div class="hero-body has-text-centered">
+      <h1 class="title is-1 mmmu">
+        <!-- <img src="/static/images/llasmol.svg" style="width:1em;vertical-align: middle" alt="Logo" /> -->
+        <span class="mmmu" style="vertical-align: middle">Experiment</span>
+      </h1>
+    </div>
+  </section>
 
-              <div class="box m-5">
-                <div class="content has-text-centered">
-                  <img src="static/images/element_grounding_examples/example_element_attributes_2.svg"
-                    alt="algebraic reasoning" width="50%" />
-                  <p> An example of grounding via element attributes after action generation. </p>
-                </div>
-              </div>
-            </div>
+  <section class="section">
+    <div class="container">
+      <div class="columns is-centered has-text-centered">
+        <div class="column is-four-fifths">
+          <!-- <h2 class="title is-3">Overview of SMolInstruct</h2> -->
+          <div class="content has-text-justified">
+            <p>
+              We comprehensively compare our LlasMol models with existing LLMs as well as the task-specific, non-LLM based SoTA models.
+              The main results are shown in the following tables.
+            </p>
+          </div>
+          <div class="content has-text-centered">
+            <p style="text-align:left;font-size:15px"> Results for name conversion (NC) and property prediction (PP) tasks. The metrics include exact match (EM), validity (Valid),
+              root mean square error (RMSE), and accuracy (Acc), where EM and Valid are in percentage. </p>
+            <img src="static/images/tables/o_1.png" alt="TODO" width="100%" />
+            <p></p>
+            <p style="text-align:left;font-size:15px"> Results for molecule captioning (MC), molecule generation (MG), forward synthesis (FS), and retrosynthesis (RS).
+              The metrics include METEOR score (METEOR), exact match (EM), Morgan fingerprint-based tanimoto similarity (FTS), and validity (Valid), 
+              where EM, FTS, and Valid are in percentage. </p>
+            <img src="static/images/tables/o_2.png" alt="TODO" width="100%" />
+            <p></p>
+            <p style="text-align:left"><strong>Main takeaways:</strong></p>
+            <p style="text-align:left">(1) LlaSMol models significantly outperform the existing LLMs on all the tasks,
+              underscoring the effectiveness of the proposed SMolInstruct dataset and the benefits of fine-
+              tuning.</p>
+            <p style="text-align:left">(2) Our four LlaSMol models show substantial differences in their performance, and LlasMol<sub>Mistral</sub> achieves the best, emphasizing
+              the significant impact of base models on downstream tasks</p>
+            <p style="text-align:left">(3) Our LlaSMol models exhibit comparable performance to SoTA models even with only a small proportion of parameters being tuned (40M, 0.59%), 
+              showing great potential to surpass task-specific models and work as universal models capable of addressing multiple chemistry tasks.</p>
           </div>
         </div>
       </div>
@@ -346,9 +387,18 @@ <h2 class="title is-3"></h2>
 
 
   <!-- @Botao TODO: update bibtex -->
+  <section class="hero is-light is-small">
+    <div class="hero-body has-text-centered">
+      <h1 class="title is-1 mmmu">
+        <!-- <img src="/static/images/llasmol.svg" style="width:1em;vertical-align: middle" alt="Logo" /> -->
+        <span class="mmmu" style="vertical-align: middle">Citation</span>
+      </h1>
+    </div>
+  </section>
   <section class="section" id="BibTeX">
     <div class="container is-max-desktop content">
-      <h2 class="title is-3 has-text-centered">BibTeX</h2>
+      <!-- <h2 class="title is-3 has-text-centered">Citation</h2> -->
+      <p>If our paper or related resources prove valuable to your research, we kindly ask for citation. Please feel free to contact us with any inquiries.</p>
       <pre><code>
       @article{yu2024llasmol,
         title={LlaSMol: Advancing Large Language Models for Chemistry with a Large-Scale, Comprehensive, High-Quality Instruction Tuning Dataset},

diff --git a/static/images/llasmol.svg b/static/images/llasmol.svg