-
Notifications
You must be signed in to change notification settings - Fork 0
/
manuscript.html
1999 lines (1854 loc) · 141 KB
/
manuscript.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
<head>
<title>An example of automating data sharing through authoring tools</title>
<!-- 2016-04-18 Mon 19:25 -->
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name="generator" content="Org-mode" />
<style type="text/css">
<!--/*--><![CDATA[/*><!--*/
.title { text-align: center; }
.todo { font-family: monospace; color: red; }
.done { color: green; }
.tag { background-color: #eee; font-family: monospace;
padding: 2px; font-size: 80%; font-weight: normal; }
.timestamp { color: #bebebe; }
.timestamp-kwd { color: #5f9ea0; }
.right { margin-left: auto; margin-right: 0px; text-align: right; }
.left { margin-left: 0px; margin-right: auto; text-align: left; }
.center { margin-left: auto; margin-right: auto; text-align: center; }
.underline { text-decoration: underline; }
#postamble p, #preamble p { font-size: 90%; margin: .2em; }
p.verse { margin-left: 3%; }
pre {
border: 1px solid #ccc;
box-shadow: 3px 3px 3px #eee;
padding: 8pt;
font-family: monospace;
overflow: auto;
margin: 1.2em;
}
pre.src {
position: relative;
overflow: visible;
padding-top: 1.2em;
}
pre.src:before {
display: none;
position: absolute;
background-color: white;
top: -10px;
right: 10px;
padding: 3px;
border: 1px solid black;
}
pre.src:hover:before { display: inline;}
pre.src-sh:before { content: 'sh'; }
pre.src-bash:before { content: 'sh'; }
pre.src-emacs-lisp:before { content: 'Emacs Lisp'; }
pre.src-R:before { content: 'R'; }
pre.src-perl:before { content: 'Perl'; }
pre.src-java:before { content: 'Java'; }
pre.src-sql:before { content: 'SQL'; }
table { border-collapse:collapse; }
caption.t-above { caption-side: top; }
caption.t-bottom { caption-side: bottom; }
td, th { vertical-align:top; }
th.right { text-align: center; }
th.left { text-align: center; }
th.center { text-align: center; }
td.right { text-align: right; }
td.left { text-align: left; }
td.center { text-align: center; }
dt { font-weight: bold; }
.footpara:nth-child(2) { display: inline; }
.footpara { display: block; }
.footdef { margin-bottom: 1em; }
.figure { padding: 1em; }
.figure p { text-align: center; }
.inlinetask {
padding: 10px;
border: 2px solid gray;
margin: 10px;
background: #ffffcc;
}
#org-div-home-and-up
{ text-align: right; font-size: 70%; white-space: nowrap; }
textarea { overflow-x: auto; }
.linenr { font-size: smaller }
.code-highlighted { background-color: #ffff00; }
.org-info-js_info-navigation { border-style: none; }
#org-info-js_console-label
{ font-size: 10px; font-weight: bold; white-space: nowrap; }
.org-info-js_search-highlight
{ background-color: #ffff00; color: #000000; font-weight: bold; }
/*]]>*/-->
</style>
<style>.abstract {color: black;}</style>
<script type="text/javascript">
/*
@licstart The following is the entire license notice for the
JavaScript code in this tag.
Copyright (C) 2012-2013 Free Software Foundation, Inc.
The JavaScript code in this tag is free software: you can
redistribute it and/or modify it under the terms of the GNU
General Public License (GNU GPL) as published by the Free Software
Foundation, either version 3 of the License, or (at your option)
any later version. The code is distributed WITHOUT ANY WARRANTY;
without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU GPL for more details.
As additional permission under GNU GPL version 3 section 7, you
may distribute non-source (e.g., minimized or compacted) forms of
that code without the copy of the GNU GPL normally required by
section 4, provided you include this license notice and a URL
through which recipients can access the Corresponding Source.
@licend The above is the entire license notice
for the JavaScript code in this tag.
*/
<!--/*--><![CDATA[/*><!--*/
function CodeHighlightOn(elem, id)
{
var target = document.getElementById(id);
if(null != target) {
elem.cacheClassElem = elem.className;
elem.cacheClassTarget = target.className;
target.className = "code-highlighted";
elem.className = "code-highlighted";
}
}
function CodeHighlightOff(elem, id)
{
var target = document.getElementById(id);
if(elem.cacheClassElem)
elem.className = elem.cacheClassElem;
if(elem.cacheClassTarget)
target.className = elem.cacheClassTarget;
}
/*]]>*///-->
</script>
<script type="text/javascript" src="http://orgmode.org/mathjax/MathJax.js"></script>
<script type="text/javascript">
<!--/*--><![CDATA[/*><!--*/
MathJax.Hub.Config({
// Only one of the two following lines, depending on user settings
// First allows browser-native MathML display, second forces HTML/CSS
// config: ["MMLorHTML.js"], jax: ["input/TeX"],
jax: ["input/TeX", "output/HTML-CSS"],
extensions: ["tex2jax.js","TeX/AMSmath.js","TeX/AMSsymbols.js",
"TeX/noUndefined.js"],
tex2jax: {
inlineMath: [ ["\\(","\\)"] ],
displayMath: [ ['$$','$$'], ["\\[","\\]"], ["\\begin{displaymath}","\\end{displaymath}"] ],
skipTags: ["script","noscript","style","textarea","pre","code"],
ignoreClass: "tex2jax_ignore",
processEscapes: false,
processEnvironments: true,
preview: "TeX"
},
showProcessingMessages: true,
displayAlign: "center",
displayIndent: "2em",
"HTML-CSS": {
scale: 100,
availableFonts: ["STIX","TeX"],
preferredFont: "TeX",
webFont: "TeX",
imageFont: "TeX",
showMathMenu: true,
},
MMLorHTML: {
prefer: {
MSIE: "MML",
Firefox: "MML",
Opera: "HTML",
other: "HTML"
}
}
});
/*]]>*///-->
</script>
</head>
<body>
<div id="content">
<h1 class="title">An example of automating data sharing through authoring tools</h1>
John R. Kitchin, Ana E. Van Gulick, and Lisa D. Zilinski
<div class="abstract">
<p>
In the current scientific publishing landscape there is a need for an authoring workflow that easily integrates data and code into manuscripts and that enables the data and code to be published in reusable form. Automated embedding of data and code into published output will enable superior communication and data archiving. In this work we demonstrate a proof of concept for a workflow, org-mode, which successfully provides this authoring capability and workflow integration. We illustrate this concept in a series of examples for potential uses of this workflow. First, we use data on citation counts to compute the h-index of an author, and show two code examples for calculating the h-index. The source for each example is automatically embedded in the PDF during the export of the document. We demonstrate how data can be embedded in image files, which themselves are embedded in the document. Finally, metadata about the embedded files can be automatically included in the exported PDF, and accessed by computer programs. In our customized export, we embedded metadata about the attached files in the PDF in an Info field. A computer program could parse this output to get a list of embedded files and carry out analyses on them. Authoring tools such as Emacs + org-mode can greatly facilitate the integration of data and code into technical writing. These tools can also automate the embedding of data into document formats intended for consumption.
</p>
<p>
\keywords{data sharing, embedding, org-mode, authoring}
</p>
</div>
<div id="outline-container-sec-1" class="outline-2">
<h2 id="sec-1"><span class="section-number-2">1</span> Introduction</h2>
<div class="outline-text-2" id="text-1">
<p>
Motivation for sharing research outputs has accelerated in the past few years with the increase of funding agency requirements and government policies <a class='org-ref-reference' href="#whitmire-2015-table-summar">whitmire-2015-table-summar</a>. Sharing research data and scholarship has moved to the forefront of these funders attention due to the government’s increased focus on maximizing return on investment of their research programs <a class='org-ref-reference' href="#zilinski-2014-evolut-data">zilinski-2014-evolut-data</a>. Additionally, publishers are increasingly requiring the sharing of research data, software code, analysis pipelines, and other supplemental documentation. However, there is little to no guidance on what information needs to be shared, how the information should be shared, how these materials should be linked to one another and any associated publications, or how to pay for depositing and preserving the information. This shift has created a need to investigate publishing and authoring tools and workflows that facilitate the integration of data and code into technical writing in order to support researchers in the publication, dissemination, and accessibility of these research products.
</p>
<p>
Currently, translating data and analysis tools from the research bench to the published paper has been made difficult by the workflows implemented by publishers. While it is common to include supporting information files with published manuscripts, they are not required, and there is no standard format or guidelines for what should be in them. For example, while Elsevier allows for the enrichment of publications through its Content Innovation <a class='org-ref-reference' href="#content-conten-innov">content-conten-innov</a> by allowing for the inclusion of interactive content (e.g. U3D models, maps, datasets, and audio files), there is no metadata specific to the additional content nor is there a way to find out which articles contain interactive files. Nature allows for the inclusion of extended data figures and tables, but only allows a maximum of ten items per paper <a class='org-ref-reference' href="#nature-manus-format-guide">nature-manus-format-guide</a>.
</p>
<p>
Notably, in an Elsevier pilot project, The Executable Paper, the computer science community was challenged to "address the question of how to reproduce computational results within the confines of the research article" <a class='org-ref-reference' href="#2015-execut-paper">2015-execut-paper</a>. Although the project ended in 2015, the code and datasets are still available with the online publications. Elsevier has acknowledged the challenge of integrating executable code and data into the current publishing workflow, but has yet to fully address the needs of the research community.
</p>
<p>
In the current publishing landscape, data and analysis protocols or code are most often included in a published paper or its supplementary materials as part of a PDF. When the data is in tabular or graphical form, it is not easy, or in some cases not even possible to reuse the data without error-prone practices of copy and paste, or digitization. Even when the data is accessible, and downloadable by a researcher, it loses the context of the paper. It is also not discoverable if the data does not have any metadata or description of its own, or if the metadata of the original work does not indicate that there is data within. A build process for this embedded data that can be easily integrated into a research workflow would be valuable for the research and publishing community both.
</p>
<p>
There are some existing tools that enable the integration of code and data in a narrative context. For example, the iPython notebook and its successor Jupyter <a class='org-ref-reference' href="#perez-2007-ipyth">perez-2007-ipyth</a>,<a class='org-ref-reference' href="#projec-juypy">projec-juypy</a>,<a class='org-ref-reference' href="#shen-2014-inter">shen-2014-inter</a> and the MatLab notebook <a class='org-ref-reference' href="#mathworks-matlab-noteb">mathworks-matlab-noteb</a> have notebook-like capabilities. The commercial tools Mathematica and Maple also have notebook features. Still other approaches exist for specific applications and tools <a class='org-ref-reference' href="#leveque-2012-reprod">leveque-2012-reprod</a>,<a class='org-ref-reference' href="#stodden-2012-reprod-resear">stodden-2012-reprod-resear</a>,<a class='org-ref-reference' href="#stodden-2013-best-pract">stodden-2013-best-pract</a>. These tools are rarely used, however, to write full papers. The R community has developed a software package known as Sweave <a class='org-ref-reference' href="#sweave">sweave</a> which enables R code to be embedded in LaTeX documents. Many of these ideas can be traced back to the early ideas of literate programming by Knuth <a class='org-ref-reference' href="#knuth-1984-liter-progr">knuth-1984-liter-progr</a>,<a class='org-ref-reference' href="#knuth-1992-liter-progr">knuth-1992-liter-progr</a>.
</p>
<p>
The ActivePapers project <a class='org-ref-reference' href="#hinsen-2015-activ">hinsen-2015-activ</a> is a more aggressive approach than a simple notebook. This approach views a "paper" as a file in HDF5 format, which enables data to be referenced by the article DOI and HDF5 path to the object. Code is stored in the file in a bytecode form that can run in a Java virtual machine. A python implementation was also considered in that work. The authors noted in that work that "there is no straightforward way to adapt legacy software to such a framework."
</p>
<p>
Similarly, the Research Object project <a class='org-ref-reference' href="#research-object">research-object</a> is creating tools for making archives of code, data and narrative writing, as well as tools for interacting with these archives. Their vision looks beyond the PDF as a format for publication. At the time of this writing, they provide software packages in Python, Java, and Ruby for working with their ideas.
</p>
<p>
Despite the number of partial solutions developed, and decades of effort, none of these solutions has achieved wide-spread use. Some solutions have solved problems that are too narrow for general use, e.g. programming language specific solutions only address the needs of a sub-community. Other solutions while technically feasible require too large a change of behavior to enable wide-spread adoption. In the notebook solutions, tools have not been created that allow the notebook to smoothly transition to the manuscript. There is usually a transition from one tool (the notebook) to another (the manuscript preparation tool). The manuscript preparation tools, e.g. Word, or a text editor, do not typically provide functionality to help with publishing code/data.
</p>
<p>
A workflow that integrates data into manuscript preparation, and that automates embedding of data and code into published output will enable superior communication and data archival. We believe that new authoring tools and workflows will be required to enable this. Here we demonstrate feasibility for embedded data as a proof of concept for a seamless writing and building process. In our proof of concept, we use a lightweight text markup language called org-mode <a class='org-ref-reference' href="#Dominik201408">Dominik201408</a> with a powerful text editor Emacs. This tool chain can be integrated throughout all the stages of research and manuscript preparation. At this time, Emacs + org-mode provides all the functionality needed for the demonstration, but other tool chains could be adapted to provide similar functionality.
</p>
<p>
org-mode is a light-weight text markup language that integrates narrative text, equations, figures, tables, and code into a single document <a class='org-ref-reference' href="#Dominik201408">Dominik201408</a>,<a class='org-ref-reference' href="#schulte-2011-activ-docum">schulte-2011-activ-docum</a>,<a class='org-ref-reference' href="#schulte-2012-multi-languag">schulte-2012-multi-languag</a>,<a class='org-ref-reference' href="#kitchin-2015-data-surfac-scien">kitchin-2015-data-surfac-scien</a>,<a class='org-ref-reference' href="#kitchin-2015-examp">kitchin-2015-examp</a>. Emacs provides a library of code that can parse an org document into a data structure, and then export the data structure to another document format, e.g. LaTeX, HTML, markdown, etc., much like XSLT can transform XML to other formats. The export can be customized to get precisely the desired output, as well as new output formats. This customization is essential, as it will enable the <i>automatic</i> embedding of data in the output files. Notably, Emacs provides an authoring environment to write org documents in mostly plain text, and in this environment the documents contain executable code blocks, sortable tables, and hyperlinked text integrated with the narrative text of the document.
</p>
<p>
org-mode documents contain "data". The tables and source code blocks in an org-mode document can literally be used as a source of data in code blocks. In the standard conversion of an org document to HTML or PDF (via LaTeX), they are converted to HTML or LaTeX tables, or syntax highlighted code representations, which are not easily read by a machine for reuse as data. These are human readable, but direct reuse of the data and code is limited to copy and paste operations, or tedious parsing. It is possible, however, to customize the export of a document, and to fine tune the export of each element in an org document. In this manuscript, we show how the contents of a table can be written to a comma-separated value file, and subsequently embedded in a PDF, or linked to in an HTML file. Similarly, each code block can be written to a source file, and embedded in a PDF or linked to in an HTML file. All of this can be automated to occur at the document export stage, requiring no additional work by the author to embed and subsequently share the data.
</p>
<p>
The approach is not unique to org-mode. A Matlab m-file can be "published" to XML and then transformed via XSLT to a variety of formats including HTML and PDF. Through a custom markup language narrative text, LaTeX equations, and figures can be embedded in comments in the m-file. IPython <a class='org-ref-reference' href="#perez-2007-ipyth">perez-2007-ipyth</a> and Jupyter <a class='org-ref-reference' href="#projec-jupy">projec-jupy</a> notebooks can also be converted from their native formats to other document formats. Both of these examples share the idea of exporting the working version of a document to a final version designed for consumption, and both could implement the ideas posed in this paper. Neither example, however, is as flexible as org-mode is in integrating all of the components needed in scientific publishing.
</p>
<p>
In this paper, we illustrate our ideas in a series of domain-general examples. First, we use data on citation counts in a table to compute the h-index of an author. In the supporting information version of the manuscript the data in this table will be stored as a comma-separated value file in the PDF. We show two code examples for calculating the h-index, and the source for each example is automatically embedded in the PDF during the export of the document. We show how data can be embedded in image files, which themselves are embedded in the document. Finally, we show how metadata about the embedded files can be included in the exported PDF, and accessed by computer programs.
</p>
</div>
</div>
<div id="outline-container-sec-2" class="outline-2">
<h2 id="sec-2"><span class="section-number-2">2</span> Methods and results</h2>
<div class="outline-text-2" id="text-2">
<p>
We first illustrate the embedding of data and code with a simple example of computing the h-index of an author. "A scientist has index \(h\) if \(h\) of his or her \(N_p\) papers have at least \(h\) citations each and the other \((N_p - h)\) papers have ≤ \(h\) citations each. " <a class='org-ref-reference' href="#hirsch-2005">hirsch-2005</a>. Table <a href="#citation-counts">citation-counts</a> shows a list of citation counts for the top 21 cited papers of the first author of this manuscript (Kitchin) in descending order.
</p>
<table id="citation-counts" border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
<caption class="t-above"><span class="table-number">Table 1:</span> Rank-ordered list of the top 21 cited papers by Kitchin as of May 20, 2015 (source Scopus).</caption>
<colgroup>
<col class="right" />
<col class="right" />
</colgroup>
<thead>
<tr>
<th scope="col" class="right">index</th>
<th scope="col" class="right"># citations</th>
</tr>
</thead>
<tbody>
<tr>
<td class="right">1</td>
<td class="right">1085</td>
</tr>
<tr>
<td class="right">2</td>
<td class="right">451</td>
</tr>
<tr>
<td class="right">3</td>
<td class="right">372</td>
</tr>
<tr>
<td class="right">4</td>
<td class="right">289</td>
</tr>
<tr>
<td class="right">5</td>
<td class="right">215</td>
</tr>
<tr>
<td class="right">6</td>
<td class="right">108</td>
</tr>
<tr>
<td class="right">7</td>
<td class="right">94</td>
</tr>
<tr>
<td class="right">8</td>
<td class="right">72</td>
</tr>
<tr>
<td class="right">9</td>
<td class="right">49</td>
</tr>
<tr>
<td class="right">10</td>
<td class="right">46</td>
</tr>
<tr>
<td class="right">11</td>
<td class="right">45</td>
</tr>
<tr>
<td class="right">12</td>
<td class="right">42</td>
</tr>
<tr>
<td class="right">13</td>
<td class="right">40</td>
</tr>
<tr>
<td class="right">14</td>
<td class="right">27</td>
</tr>
<tr>
<td class="right">15</td>
<td class="right">26</td>
</tr>
<tr>
<td class="right">16</td>
<td class="right">20</td>
</tr>
<tr>
<td class="right">17</td>
<td class="right">20</td>
</tr>
<tr>
<td class="right">18</td>
<td class="right">18</td>
</tr>
<tr>
<td class="right">19</td>
<td class="right">18</td>
</tr>
<tr>
<td class="right">20</td>
<td class="right">17</td>
</tr>
<tr>
<td class="right">21</td>
<td class="right">16</td>
</tr>
</tbody>
</table><a href="citation-counts.csv">citation-counts.csv</a> <a href="data:text/csv;charset=US-ASCII;base64,ImluZGV4IiwgIiMgY2l0YXRpb25zIgoiMSIsICIxMDg1IgoiMiIsICI0NTEiCiIzIiwgIjM3MiIK
IjQiLCAiMjg5IgoiNSIsICIyMTUiCiI2IiwgIjEwOCIKIjciLCAiOTQiCiI4IiwgIjcyIgoiOSIs
ICI0OSIKIjEwIiwgIjQ2IgoiMTEiLCAiNDUiCiIxMiIsICI0MiIKIjEzIiwgIjQwIgoiMTQiLCAi
MjciCiIxNSIsICIyNiIKIjE2IiwgIjIwIgoiMTciLCAiMjAiCiIxOCIsICIxOCIKIjE5IiwgIjE4
IgoiMjAiLCAiMTciCiIyMSIsICIxNiIK">data uri</a>
<p>
One can see by inspection of Table <a href="#citation-counts">citation-counts</a> that the h-index for this set of data is 18. That is to say that in this set of papers, 18 papers have been cited 18 or more times, and every other paper in the set is cited 18 times or less. A computer code can also calculate the h-index, for example, Listing <a href="#h-index">h-index</a> shows an Emacs-lisp code that does this. We chose Emacs-lisp for this example because in a very compact form, we can <i>read the data</i> from this document, and in a simple loop calculate the h-index. This illustrates the use of a document <i>as a data source</i>. Listing <a href="#h-index-python">h-index-python</a> shows the same algorithm written in Python. A subtle difference in this code is that the <i>data</i> is passed directly from Table <a href="#citation-counts">citation-counts</a> to the code <i>within the document</i>. The working version of this document is fundamentally and functionally different than the final version designed for consumption. This is not evident in the published version of this document, but org-mode enables this during manuscript preparation.
</p>
<div class="org-src-container">
<label class="org-src-name">An emacs-lisp script to calculate the h-index from the data in Table <a href="#citation-counts">citation-counts</a>.</label>
<pre class="src src-emacs-lisp" id="h-index">(<span style="color: #0000FF;">let*</span> ((table-data (org-babel-ref-resolve <span style="color: #008000;">"citation-counts"</span>))
<span style="color: #8D8D84;">;; </span><span style="color: #8D8D84; font-style: italic;">reads the table from the document we know there is</span>
<span style="color: #8D8D84;">;; </span><span style="color: #8D8D84; font-style: italic;">a header, and an hline, so here we delete the hline,</span>
<span style="color: #8D8D84;">;; </span><span style="color: #8D8D84; font-style: italic;">and take the rest of the data</span>
(data (cdr (org-babel-del-hlines table-data))))
(format <span style="color: #008000;">"h-index = %s"</span>
(<span style="color: #0000FF;">loop</span> for (index count) in data
until (> index count)
finally return (- index 1))))
</pre>
</div><a href="h-index.elisp">h-index.elisp</a> <a href="data:text/emacs-lisp;charset=US-ASCII;base64,KGxldCogKCh0YWJsZS1kYXRhIChvcmctYmFiZWwtcmVmLXJlc29sdmUgImNpdGF0aW9uLWNvdW50
cyIpKQogICAgICAgOzsgcmVhZHMgdGhlIHRhYmxlIGZyb20gdGhlIGRvY3VtZW50IHdlIGtub3cg
dGhlcmUgaXMKICAgICAgIDs7IGEgaGVhZGVyLCBhbmQgYW4gaGxpbmUsIHNvIGhlcmUgd2UgZGVs
ZXRlIHRoZSBobGluZSwKICAgICAgIDs7IGFuZCB0YWtlIHRoZSByZXN0IG9mIHRoZSBkYXRhCiAg
ICAgICAoZGF0YSAoY2RyIChvcmctYmFiZWwtZGVsLWhsaW5lcyB0YWJsZS1kYXRhKSkpKQogIChm
b3JtYXQgImgtaW5kZXggPSAlcyIKCSAgKGxvb3AgZm9yIChpbmRleCBjb3VudCkgaW4gZGF0YQoJ
CXVudGlsICg+IGluZGV4IGNvdW50KQoJCWZpbmFsbHkgcmV0dXJuICgtIGluZGV4IDEpKSkpCg==">code uri</a>
<pre>RESULTS: h-index = 18
</pre>
<div class="org-src-container">
<label class="org-src-name">A Python script to calculate the h-index from the data in Table <a href="#citation-counts">citation-counts</a>.</label>
<pre class="src src-python" id="h-index-python"><span style="color: #0000FF;">for</span> index, count <span style="color: #0000FF;">in</span> data:
<span style="color: #9B9B9B; background-color: #EDEDED;"> </span> <span style="color: #0000FF;">if</span> index > count:
<span style="color: #9B9B9B; background-color: #EDEDED;"> </span> <span style="color: #9B9B9B; background-color: #EDEDED;"> </span> <span style="color: #0000FF;">print</span> <span style="color: #008000;">'h-index = {}'</span>.<span style="color: #006FE0;">format</span>(index - 1)
<span style="color: #9B9B9B; background-color: #EDEDED;"> </span> <span style="color: #9B9B9B; background-color: #EDEDED;"> </span> <span style="color: #0000FF;">break</span>
</pre>
</div><a href="h-index-python.py">h-index-python.py</a> <a href="data:text/python;charset=US-ASCII;base64,Zm9yIGluZGV4LCBjb3VudCBpbiBkYXRhOgogICAgaWYgaW5kZXggPiBjb3VudDoKICAgICAgICBw
cmludCAnaC1pbmRleCA9IHt9Jy5mb3JtYXQoaW5kZXggLSAxKQogICAgICAgIGJyZWFrCg==">code uri</a>
<pre>RESULTS: h-index = 18
</pre>
<p>
A graphical visualization of the h-index is the intersection of a parity line with the citation data. Listing <a href="#h-index-graphical">h-index-graphical</a> shows a Python script that generates a plot to illustrate this, again, using the data embedded in the document (Fig. <a href="#fig-hindex">fig-hindex</a>).
</p>
<div class="org-src-container">
<label class="org-src-name">A Python script to plot the h-index.</label>
<pre class="src src-python" id="h-index-graphical"><span style="color: #0000FF;">import</span> matplotlib.pyplot <span style="color: #0000FF;">as</span> plt
plt.figure(figsize=(3, 4))
<span style="color: #8D8D84;"># </span><span style="color: #8D8D84; font-style: italic;">the citation curve</span>
plt.plot([x[0] <span style="color: #0000FF;">for</span> x <span style="color: #0000FF;">in</span> data], <span style="color: #8D8D84;"># </span><span style="color: #8D8D84; font-style: italic;">the index</span>
<span style="color: #9B9B9B; background-color: #EDEDED;"> </span> <span style="color: #9B9B9B; background-color: #EDEDED;"> </span> <span style="color: #9B9B9B; background-color: #EDEDED;"> </span>[x[1] <span style="color: #0000FF;">for</span> x <span style="color: #0000FF;">in</span> data], <span style="color: #8D8D84;"># </span><span style="color: #8D8D84; font-style: italic;">the citation count</span>
<span style="color: #9B9B9B; background-color: #EDEDED;"> </span> <span style="color: #9B9B9B; background-color: #EDEDED;"> </span> <span style="color: #9B9B9B; background-color: #EDEDED;"> </span>label=<span style="color: #008000;">'Citations'</span>)
<span style="color: #8D8D84;"># </span><span style="color: #8D8D84; font-style: italic;">the parity line</span>
plt.plot([x[0] <span style="color: #0000FF;">for</span> x <span style="color: #0000FF;">in</span> data], <span style="color: #8D8D84;"># </span><span style="color: #8D8D84; font-style: italic;">the index</span>
<span style="color: #9B9B9B; background-color: #EDEDED;"> </span> <span style="color: #9B9B9B; background-color: #EDEDED;"> </span> <span style="color: #9B9B9B; background-color: #EDEDED;"> </span>[x[0] <span style="color: #0000FF;">for</span> x <span style="color: #0000FF;">in</span> data], <span style="color: #8D8D84;"># </span><span style="color: #8D8D84; font-style: italic;">the index</span>
<span style="color: #9B9B9B; background-color: #EDEDED;"> </span> <span style="color: #9B9B9B; background-color: #EDEDED;"> </span> <span style="color: #9B9B9B; background-color: #EDEDED;"> </span>label=<span style="color: #008000;">'parity'</span>)
plt.legend(loc=<span style="color: #008000;">'best'</span>)
plt.ylim([0, 100])
plt.xlabel(<span style="color: #008000;">'index'</span>)
plt.ylabel(<span style="color: #008000;">'Citation count'</span>)
plt.tight_layout()
plt.savefig(<span style="color: #008000;">'h-index.png'</span>, dpi=300)
</pre>
</div><a href="h-index-graphical.py">h-index-graphical.py</a> <a href="data:text/python;charset=US-ASCII;base64,aW1wb3J0IG1hdHBsb3RsaWIucHlwbG90IGFzIHBsdAoKcGx0LmZpZ3VyZShmaWdzaXplPSgzLCA0
KSkKIyB0aGUgY2l0YXRpb24gY3VydmUKcGx0LnBsb3QoW3hbMF0gZm9yIHggaW4gZGF0YV0sICAj
IHRoZSBpbmRleAogICAgICAgICBbeFsxXSBmb3IgeCBpbiBkYXRhXSwgICMgdGhlIGNpdGF0aW9u
IGNvdW50CiAgICAgICAgIGxhYmVsPSdDaXRhdGlvbnMnKQoKIyB0aGUgcGFyaXR5IGxpbmUKcGx0
LnBsb3QoW3hbMF0gZm9yIHggaW4gZGF0YV0sICAjIHRoZSBpbmRleAogICAgICAgICBbeFswXSBm
b3IgeCBpbiBkYXRhXSwgICMgdGhlIGluZGV4CiAgICAgICAgIGxhYmVsPSdwYXJpdHknKQoKcGx0
LmxlZ2VuZChsb2M9J2Jlc3QnKQpwbHQueWxpbShbMCwgMTAwXSkKcGx0LnhsYWJlbCgnaW5kZXgn
KQpwbHQueWxhYmVsKCdDaXRhdGlvbiBjb3VudCcpCnBsdC50aWdodF9sYXlvdXQoKQpwbHQuc2F2
ZWZpZygnaC1pbmRleC5wbmcnLCBkcGk9MzAwKQo=">code uri</a>
<div class="figure">
<p><img src="./h-index.png" alt="h-index.png" />
</p>
<p><span class="figure-number">Figure 1:</span> Visualization of the h-index. The h-index is defined approximately by the index where the intersection of the two lines occurs. <div id="fig-hindex"></p>
</div>
<p>
We have illustrated two types of data that can be embedded in this document so far: tabular data and code. There could be other types of data embedded in the document as well. To illustrate the flexibility of this idea, Fig. <a href="#fig-hunt">fig-hunt</a> shows an image of our campus main library. We have used steganography to embed the data from Table <a href="#citation-counts">citation-counts</a> in the form of a csv file in the image. The code that generated this image can be found in the Appendix in Listing <a href="#lst-encode">lst-encode</a>.
</p>
<div class="figure">
<p><img src="./stego-hunt-library.png" alt="stego-hunt-library.png" />
</p>
<p><span class="figure-number">Figure 2:</span> Hunt Library at Carnegie Mellon University. The image has a csv data-file hidden in it using steganography. <div id="fig-hunt"></p>
</div>
<p>
Listing <a href="#lst-decode">lst-decode</a> shows a simple example of extracting the data from that image.
</p>
<div class="org-src-container">
<label class="org-src-name">Python script to extract steganography data from an image.</label>
<pre class="src src-python" id="lst-decode"><span style="color: #0000FF;">from</span> steganopy.api <span style="color: #0000FF;">import</span> extract_data_from_stegano_image
<span style="color: #BA36A5;">extracted_data</span> = extract_data_from_stegano_image(
<span style="color: #9B9B9B; background-color: #EDEDED;"> </span> image=<span style="color: #008000;">'stego-hunt-library.png'</span>)
<span style="color: #0000FF;">print</span> extracted_data
</pre>
</div><a href="lst-decode.py">lst-decode.py</a> <a href="data:text/python;charset=US-ASCII;base64,ZnJvbSBzdGVnYW5vcHkuYXBpIGltcG9ydCBleHRyYWN0X2RhdGFfZnJvbV9zdGVnYW5vX2ltYWdl
CgpleHRyYWN0ZWRfZGF0YSA9IGV4dHJhY3RfZGF0YV9mcm9tX3N0ZWdhbm9faW1hZ2UoCiAgICBp
bWFnZT0nc3RlZ28taHVudC1saWJyYXJ5LnBuZycpCgpwcmludCBleHRyYWN0ZWRfZGF0YQo=">code uri</a>
<pre class="example">
"index", "# citations"
"1", "1085"
"2", "451"
"3", "372"
"4", "289"
"5", "215"
"6", "108"
"7", "94"
"8", "72"
"9", "49"
"10", "46"
"11", "45"
"12", "42"
"13", "40"
"14", "27"
"15", "26"
"16", "20"
"17", "20"
"18", "18"
"19", "18"
"20", "17"
"21", "16"
</pre>
</div>
<div id="outline-container-sec-2-1" class="outline-3">
<h3 id="sec-2-1"><span class="section-number-3">2.1</span> Exporting the manuscript with automatic data embedding</h3>
<div class="outline-text-3" id="text-2-1">
<p>
During the manuscript export we have the opportunity to execute code for each element of the document. For example, when a table is being exported, we can run code to write the data in the table to a file in some format, e.g. comma-separate values, json or base64-encoded text. Similarly, when a code block is being exported, we have the opportunity to write the code to a file. We can also insert content into the exported document, which makes it easy to embed files in the output. Depending on the output format, e.g. LaTeX or html, we can do different things. We can save information about these files, so that they can be added as metadata to the PDF afterwards. All of this is done automatically. The full code for the export can be found in Section <a href="#export-code">export-code</a>. It is written in emacs-lisp.
</p>
<p>
The key points here are that the embedding is done automatically, and it is highly flexible. The data and code embedded in the document is the <i>actual data and code</i> used in the preparation of the document. This significantly reduces the possibilities to introduce errors by copying the wrong data in, or by modifying external files and neglecting to update the document. The automated approach alleviates the tedium of preparing the files, and in converting them to specific formats. In short, from the author's point of view, one gets this for free once the framework is in place. The original source of the manuscript can also be embedded in the output file.
</p>
</div>
</div>
<div id="outline-container-sec-2-2" class="outline-3">
<h3 id="sec-2-2"><span class="section-number-3">2.2</span> Discovering embedded data</h3>
<div class="outline-text-3" id="text-2-2">
<p>
The Xpdf tools <a class='org-ref-reference' href="#xpdf">xpdf</a> provide command line tools to probe PDF files and extract information from them. For example, one can easily list the attached files in a PDF as shown in Listing <a href="#lst-listpdf">lst-listpdf</a>.
</p>
<div class="org-src-container">
<label class="org-src-name">Command line tool for listing the file attachments in a PDF file. <div id="lst-listpdf"></label>
<pre class="src src-sh">pdfdetach -list manuscript.pdf
</pre>
</div><a href="420c5110ed9671a22b09016aa4909575">420c5110ed9671a22b09016aa4909575</a> <a href="data:text/sh;charset=US-ASCII;base64,cGRmZGV0YWNoIC1saXN0IG1hbnVzY3JpcHQucGRmCg==">code uri</a>
<pre class="example">
15 embedded files
1: citation-counts.csv
2: h-index.elisp
3: h-index-python.py
4: h-index-graphical.py
5: lst-decode.py
6: 420c5110ed9671a22b09016aa4909575
7: b0bd6476e33c900bfeeb1f4d11d1b503
8: lst-encode.py
9: table-format.elisp
10: src-block-format.elisp
11: attachfile-link.elisp
12: 1502caf850dfebc22b19c7de804de3a3.elisp
13: 796806ad7fbc93b82453b6044f1738d4.elisp
14: 4847832a1d277d93243f7eef7cfeaf5c.elisp
15: manuscript.org
</pre>
<p>
In our customized export, we embedded metadata about the attached files in the PDF in an Info field called EmbeddedFiles. This can also be probed using pdftk <a class='org-ref-reference' href="#labs-pdftk">labs-pdftk</a> as shown in Listing <a href="#lst-dumpdata">lst-dumpdata</a>. A computer program could parse this output, and get a list of embedded files (or other stored data), and then do subsequent analysis of them. Other formats could be used other than a flat list, including formats suggested by the Open Archives Initiative Object Reuse and Exchange project <a class='org-ref-reference' href="#openarchives">openarchives</a>. We chose a flat list for the proof of concept and simplicity. It is also feasible to store this data in XMP (at least in the PDF), but there are fewer tools available for creating and reading XMP than there are for the Info fields.
</p>
<div class="org-src-container">
<label class="org-src-name">Command to show PDF metadata Info fields. The fold command wraps the output so it is only 45 characters wide. <div id="lst-dumpdata"></label>
<pre class="src src-sh">pdftk manuscript-with-embedded-data.pdf dump_data <span style="color: #008000;">\</span>
| grep -A 1 <span style="color: #008000;">"InfoKey: EmbeddedFiles"</span> | fold -w 45
</pre>
</div><a href="efa7045fbc2eeb7d6556814147993919">efa7045fbc2eeb7d6556814147993919</a> <a href="data:text/sh;charset=US-ASCII;base64,cGRmdGsgbWFudXNjcmlwdC13aXRoLWVtYmVkZGVkLWRhdGEucGRmIGR1bXBfZGF0YSBcCnwgZ3Jl
cCAtQSAxICJJbmZvS2V5OiBFbWJlZGRlZEZpbGVzIiB8IGZvbGQgLXcgNDUK">code uri</a>
<pre class="example">
InfoKey: EmbeddedFiles
InfoValue: (manuscript.org eebb5b9a46836aedd9
1641f4b327277c.elisp 6df6bada55b03bf6b71abc9d
c32661d0.elisp 796806ad7fbc93b82453b6044f1738
d4.elisp 2efb34a32a9c4653ff697c1d00fd294b.eli
sp attachfile-link.elisp dafeb6b72e57a1159588
5a79d0ce2cbe.elisp src-block-format.elisp tab
le-format.elisp lst-encode.py 6f43f17d713d8b1
30c9b1f511829ab37 420c5110ed9671a22b09016aa49
09575 lst-decode.py h-index-graphical.py h-in
dex-python.py h-index.elisp citation-counts.c
sv)
</pre>
<p>
It should be evident here that there are a variety of tools to interact with these data files ranging from functionality built into a PDF reader, to command-line utilities, to script programs in a variety of languages, and finally functionality built into a text editor (in our case Emacs). Many of these tools are open-source and freely available.
</p>
<p>
In implementing this novel data workflow there are many considerations about how to make the data embedded in a paper discoverable and ultimately useful for more researchers. This involves collaboration between publishers, researchers, and databases/search engines on an efficient and effective way to implement this workflow and how to tag appropriate research and data.
</p>
</div>
</div>
<div id="outline-container-sec-2-3" class="outline-3">
<h3 id="sec-2-3"><span class="section-number-3">2.3</span> Limitations of this approach for large or complex data sets and codes</h3>
<div class="outline-text-3" id="text-2-3">
<p>
Some data sets may be too large to conveniently embed in a PDF or data URI. It is not easy to define how large is too large, as it is a matter of convenience in some cases, and technical limitations in other cases. For example it is not convenient to download a 10 GB PDF file, and it may not be possible to open it in some PDF readers. Similarly, it may not be convenient to load a 10GB HTML page. Data embedding is not the only way to share data, it is simply convenient for some kinds of data. An alternative approach is to provide links to data. The use of linked data is completely compatible with the workflow we describe in this work. If the data is accessible in an external repository, e.g. Figshare <a class='org-ref-reference' href="#figshare">figshare</a>, Zenodo <a class='org-ref-reference' href="#zenodo-zenod">zenodo-zenod</a>, or some other data repository, it is perfectly reasonable to provide links to that data, <i>and</i> the code used to interact with the data, e.g. how it was downloaded, analyzed, etc. The utility of linking depends heavily on the permanence of the links. We have used this approach in one of our own publications <a class='org-ref-reference' href="#xu-suppor">xu-suppor</a> where a large (1.8 GB) dataset was linked to in Zenodo.
</p>
<p>
It is also possible that it is not practical to put all the code into the document. In that case, it is possible to reference some codes, e.g. commercial codes, by a version that would enable others to reproduce the work if they had access to the code. Alternatively, Zenodo and Github <a class='org-ref-reference' href="#github">github</a> make it possible to create archives of open-source code projects that have DOIs associated with them. That makes it possible to even provide links to code repositories.
</p>
<p>
Some datasets may appear to be to complex to conveniently embed. In our work to date, we have not found datasets we could not embed in a practical way. For example, in Ref. <a class='org-ref-reference' href="#hallenbeck-2013-effec-o2">hallenbeck-2013-effec-o2</a> we embedded Excel datasheets into the supporting information PDF file. In Ref. <a class='org-ref-reference' href="#curnan-2014-effec-concen">curnan-2014-effec-concen</a> we embedded a series of comma-separated value files, along with examples of code to create a SQL database file in sqlite, and to query that database to perform the analyses used in the paper. A sqlite database is a flat file format, and could be embedded in a PDF or as a data URI in HTML. In Ref. <a class='org-ref-reference' href="#miller-2014-simul-temper">miller-2014-simul-temper</a> we embedded large tabular datasets into the PDF. These datasets would have made the PDF over 900 pages long if printed in the document, but by embedding them, the document was kept a reasonable size for reading.
</p>
<p>
We have had other research projects where the data is located on a private research computing cluster that can only be accessed from our campus by authorized users. In these cases, our workflow tends to have two parts: one which is local and only reproducible by us, and one that is repeatable by others. In the first part, we construct a dataset that is portable, and usually stored in JSON format. That data file is embedded in the document, and all subsequent analysis uses the data file, which ensures the subsequent analysis is reproducible.
</p>
<p>
It is not possible to generalize our approach to every conceivable research project. We have used it in a broad range of applications, and we have always been able to adapt it as needed. The main workflow we envision significant difficulties in adapting it are workflows that heavily utilize graphical user interfaces (GUI). There are still no good approaches to documenting GUI workflows, where the order of GUI actions may be important, or where it is non-obvious what GUI actions were performed.
</p>
<p>
Finally, a practical limitation of embedding data files is in PDF readers. Embedded data files are part of the PDF standard, but not all readers support them equally. The Preview in Mac OSX, for example, does not support attachment extraction, and Adobe Acrobat will not allow one to extract some types of files, e.g. zip files and executables. There are, however, command-line tools that will extract these attachments <a class='org-ref-reference' href="#labs-pdftk">labs-pdftk</a>.
</p>
<p>
The embedding of data in manuscripts and supporting information does not solve all data-sharing problems. For example, Candela and co-workers note the "difficulties of separating the data from the rest of the material and reusing them" <a class='org-ref-reference' href="#candela-2015-data-journ">candela-2015-data-journ</a>. For data that is only available as a table in PDF, this observation is correct. For data that is an org-mode table, however, it is comparatively easy to separate the data (and code) from the manuscript using computational tools. The second drawback they note is that it is not possible for readers to "find and link data independently of the main publication" if it is in supporting information. This is partially true. Supporting information files are not currently indexed. Readers will find the data by reading the main publication and supporting information if it is prepared as we propose. They will also learn how the data was used in the original work. We see this as a feature of our proof of concept; the readers would cite the main publication if they use the data in their work. This is important because "getting credit" for the data has been identified as an important requirement for enabling and promoting a data-sharing culture among scientists <a class='org-ref-reference' href="#reilly-2011-repor-integ">reilly-2011-repor-integ</a>.
</p>
</div>
</div>
<div id="outline-container-sec-2-4" class="outline-3">
<h3 id="sec-2-4"><span class="section-number-3">2.4</span> Is Emacs + org-mode necessary for this?</h3>
<div class="outline-text-3" id="text-2-4">
<p>
We have implemented our approach in Emacs and org-mode because these tools made it possible to implement the approach today. They made it possible because org-mode can parse a document into a data structure that contains recognizable elements such as code blocks, tables and links. Furthermore, org-mode provides the machinery to transform those elements into new, customizable formats such as LaTeX and HTML. org-mode also provides the executable code capability, ensuring that the code in the manuscript is the code that was used for the analysis. Finally, the machinery is deeply integrated into Emacs, enabling the full automation of the approach. In the end, the approach leverages tools available today, and that are compatible with current publishing standards.
</p>
<p>
Other tool chains could be adapted to do this as well. Any tool chain where a document can be represented in a structured format of elements, and where elements can be transformed could be adapted at least to some extent to the approach we have described here. For example, modern Microsoft Word documents are stored in xml, and it is conceivable that Visual Basic could be used to create plugins that enable the approach we examine here. A tool chain that could parse LaTeX documents into a data structure could modify the document during the build process to embed data. Other editors that are extensible could develop automation solutions similar to what we have described here. There are a growing number of org-mode parsers in Ruby, nodejs, Python, and other languages <a class='org-ref-reference' href="#org-mode-community">org-mode-community</a> that can be leveraged, as well as tools such as Pandoc <a class='org-ref-reference' href="#pandoc">pandoc</a> that provide conversion tools between different formats. While it is technically possible to provide similar functionalities with other tools, we have found Emacs + org-mode to be the most flexible in our hands.
</p>
</div>
</div>
</div>
<div id="outline-container-sec-3" class="outline-2">
<h2 id="sec-3"><span class="section-number-2">3</span> Conclusions</h2>
<div class="outline-text-2" id="text-3">
<p>
The principle idea we have developed is that there are (at least) two versions of most technical documents: a working, functional version that contains data, code, and analysis and a version designed for consumption (often PDF or HTML) that is often derived from the functional version. We have developed a workflow that largely automates the derivation of the consumption version from the functional version, and that automatically embeds the code and data into the consumption version through a conversion (export) process that converts the functional version to the consumption version using org-mode.
</p>
<p>
We have illustrated a set of authoring tools and workflow that enables the automation of data and code embedding in technical documents. Our approach builds on established tools used already, and extends them to provide the means for implementation of the workflow. This workflow is compatible with the existing publication frameworks which require LaTeX, PDF or HTML submissions. Although similar ideas can be implemented in other tools, including iPython/Jupyter notebooks, Matlab, and other extensible environments, to our knowledge none of these are as flexible or powerful as org-mode is. We believe this overall approach is a very promising one for expanding the ease of data sharing among scientists.
</p>
</div>
</div>
<div id="outline-container-sec-4" class="outline-2">
<h2 id="sec-4"><span class="section-number-2">4</span> Appendix</h2>
<div class="outline-text-2" id="text-4">
</div><div id="outline-container-sec-4-1" class="outline-3">
<h3 id="sec-4-1"><span class="section-number-3">4.1</span> Embedding data in images</h3>
<div class="outline-text-3" id="text-4-1">
<p>
We use the steganopy <a class='org-ref-reference' href="#steganopy">steganopy</a> Python package to illustrate the use of steganography to put data in an image. The point is not that steganography is an ideal way to do this, but that our general approach is flexible. The embedded data could be XMP, or other types of metadata.
</p>
<div class="org-src-container">
<label class="org-src-name">Code to generate an image with an embedded csv file in it.</label>
<pre class="src src-python" id="lst-encode"><span style="color: #0000FF;">from</span> steganopy.api <span style="color: #0000FF;">import</span> create_stegano_image
<span style="color: #BA36A5;">stegano_image</span> = create_stegano_image(
<span style="color: #9B9B9B; background-color: #EDEDED;"> </span> original_image=<span style="color: #008000;">'hunt-library.png'</span>,
<span style="color: #9B9B9B; background-color: #EDEDED;"> </span> data_to_hide=<span style="color: #008000;">'citation-counts.csv'</span>)
stegano_image.save(<span style="color: #008000;">"stego-hunt-library.png"</span>)
</pre>
</div><a href="lst-encode.py">lst-encode.py</a> <a href="data:text/python;charset=US-ASCII;base64,ZnJvbSBzdGVnYW5vcHkuYXBpIGltcG9ydCBjcmVhdGVfc3RlZ2Fub19pbWFnZQoKc3RlZ2Fub19p
bWFnZSA9IGNyZWF0ZV9zdGVnYW5vX2ltYWdlKAogICAgb3JpZ2luYWxfaW1hZ2U9J2h1bnQtbGli
cmFyeS5wbmcnLAogICAgZGF0YV90b19oaWRlPSdjaXRhdGlvbi1jb3VudHMuY3N2JykKCnN0ZWdh
bm9faW1hZ2Uuc2F2ZSgic3RlZ28taHVudC1saWJyYXJ5LnBuZyIpCg==">code uri</a>
</div>
</div>
<div id="outline-container-sec-4-2" class="outline-3">
<h3 id="sec-4-2"><span class="section-number-3">4.2</span> The custom export code \label{export-code}</h3>
<div class="outline-text-3" id="text-4-2">
<p>
Here we define a custom table exporter. We use the regular table export mechanism, but save the contents of the table as a csv file. We define exports for two backends: LaTeX and HTML. For LaTeX, we use the attachfile <a class='org-ref-reference' href="#pakin-attachfile">pakin-attachfile</a> package to embed the data file in the PDF. For HTML, we insert a link to the data file, and a data uri link to the HTML output. We store the filename of each generated table in a global variable named <code>*embedded-files*</code> so we can create a new Info metadata entry in the exported PDF.
</p>
<div class="org-src-container">
<pre class="src src-emacs-lisp" id="table-format">(<span style="color: #0000FF;">defvar</span> <span style="color: #BA36A5;">*embedded-files*</span> '()
<span style="color: #036A07;">"List of files embedded in the output."</span>)
(<span style="color: #0000FF;">defun</span> <span style="color: #006699;">my-table-format</span> (table contents info)
(<span style="color: #0000FF;">let*</span> ((tblname (org-element-property <span style="color: #006FE0;">:name</span> table))
(tblstart (org-element-property
<span style="color: #006FE0;">:contents-begin</span> table))
(tbl-data (<span style="color: #0000FF;">save-excursion</span>
(goto-char tblstart)
(org-babel-del-hlines
(org-babel-read-table))))
(format (elt (plist-get info <span style="color: #006FE0;">:back-end</span>) 2))
(csv-file (concat tblname <span style="color: #008000;">".csv"</span>))
(data-uri-data))
<span style="color: #8D8D84;">;; </span><span style="color: #8D8D84; font-style: italic;">Here we convert the table data to a csv file</span>
(<span style="color: #0000FF;">with-temp-file</span> csv-file
(<span style="color: #0000FF;">loop</span> for row in tbl-data
do
(insert
(mapconcat
(<span style="color: #0000FF;">lambda</span> (x) (format <span style="color: #008000;">"\"%s\""</span> x))
row
<span style="color: #008000;">", "</span> ))
(insert <span style="color: #008000;">"\n"</span>))
(<span style="color: #0000FF;">setq</span> data-uri-data
(base64-encode-string
(buffer-string))))
(add-to-list '*embedded-files* csv-file)
(<span style="color: #0000FF;">cond</span>
<span style="color: #8D8D84;">;; </span><span style="color: #8D8D84; font-style: italic;">HTML export</span>
((eq format 'html)
(concat
(org-html-table table contents info)
(format <span style="color: #008000;">"<a href=\"%s\">%s</a>"</span>
csv-file csv-file)
<span style="color: #008000;">" "</span>
(format (concat <span style="color: #008000;">"<a href=\"data:text/csv;"</span>
<span style="color: #008000;">"charset=US-ASCII;"</span>
<span style="color: #008000;">"base64,%s\">data uri</a>"</span>)
data-uri-data)))
<span style="color: #8D8D84;">;; </span><span style="color: #8D8D84; font-style: italic;">LaTeX/PDF export</span>
((eq format 'latex)
(concat
(org-latex-table table contents info)
<span style="color: #008000;">"\n"</span>
(format <span style="color: #008000;">"%s: \\attachfile{%s}"</span>
csv-file csv-file))))))
</pre>
</div><a href="table-format.elisp">table-format.elisp</a> <a href="data:text/emacs-lisp;charset=US-ASCII;base64,KGRlZnZhciAqZW1iZWRkZWQtZmlsZXMqICcoKQogICJMaXN0IG9mIGZpbGVzIGVtYmVkZGVkIGlu
IHRoZSBvdXRwdXQuIikKCihkZWZ1biBteS10YWJsZS1mb3JtYXQgKHRhYmxlIGNvbnRlbnRzIGlu
Zm8pCiAgKGxldCogKCh0YmxuYW1lIChvcmctZWxlbWVudC1wcm9wZXJ0eSA6bmFtZSB0YWJsZSkp
CgkgKHRibHN0YXJ0IChvcmctZWxlbWVudC1wcm9wZXJ0eQoJCSAgICA6Y29udGVudHMtYmVnaW4g
dGFibGUpKQoJICh0YmwtZGF0YSAoc2F2ZS1leGN1cnNpb24KCQkgICAgIChnb3RvLWNoYXIgdGJs
c3RhcnQpCgkJICAgICAob3JnLWJhYmVsLWRlbC1obGluZXMKCQkgICAgICAob3JnLWJhYmVsLXJl
YWQtdGFibGUpKSkpCgkgKGZvcm1hdCAoZWx0IChwbGlzdC1nZXQgaW5mbyA6YmFjay1lbmQpIDIp
KQoJIChjc3YtZmlsZSAoY29uY2F0IHRibG5hbWUgIi5jc3YiKSkKCSAoZGF0YS11cmktZGF0YSkp
CgogICAgOzsgSGVyZSB3ZSBjb252ZXJ0IHRoZSB0YWJsZSBkYXRhIHRvIGEgY3N2IGZpbGUKICAg
ICh3aXRoLXRlbXAtZmlsZSBjc3YtZmlsZQogICAgICAobG9vcCBmb3Igcm93IGluIHRibC1kYXRh
CgkgICAgZG8KCSAgICAoaW5zZXJ0CgkgICAgIChtYXBjb25jYXQKCSAgICAgIChsYW1iZGEgKHgp
IChmb3JtYXQgIlwiJXNcIiIgeCkpCgkgICAgICByb3cKCSAgICAgICIsICIgKSkKCSAgICAoaW5z
ZXJ0ICJcbiIpKQogICAgICAoc2V0cSBkYXRhLXVyaS1kYXRhCgkgICAgKGJhc2U2NC1lbmNvZGUt
c3RyaW5nCgkgICAgIChidWZmZXItc3RyaW5nKSkpKQoKICAgIChhZGQtdG8tbGlzdCAnKmVtYmVk
ZGVkLWZpbGVzKiBjc3YtZmlsZSkKCiAgICAoY29uZAogICAgIDs7IEhUTUwgZXhwb3J0CiAgICAg
KChlcSBmb3JtYXQgJ2h0bWwpCiAgICAgIChjb25jYXQKICAgICAgIChvcmctaHRtbC10YWJsZSB0
YWJsZSBjb250ZW50cyBpbmZvKQogICAgICAgKGZvcm1hdCAiPGEgaHJlZj1cIiVzXCI+JXM8L2E+
IgoJICAgICAgIGNzdi1maWxlIGNzdi1maWxlKQogICAgICAgIiAiCiAgICAgICAoZm9ybWF0IChj
b25jYXQgICI8YSBocmVmPVwiZGF0YTp0ZXh0L2NzdjsiCiAgICAgICAgICAgICAgICAgICAgICAg
ICJjaGFyc2V0PVVTLUFTQ0lJOyIKICAgICAgICAgICAgICAgICAgICAgICAgImJhc2U2NCwlc1wi
PmRhdGEgdXJpPC9hPiIpCgkgICAgICAgZGF0YS11cmktZGF0YSkpKQogICAgIDs7IExhVGVYL1BE
RiBleHBvcnQKICAgICAoKGVxIGZvcm1hdCAnbGF0ZXgpCiAgICAgIChjb25jYXQKICAgICAgIChv
cmctbGF0ZXgtdGFibGUgdGFibGUgY29udGVudHMgaW5mbykKICAgICAgICJcbiIKICAgICAgIChm
b3JtYXQgIiVzOiBcXGF0dGFjaGZpbGV7JXN9IgoJICAgICAgIGNzdi1maWxlIGNzdi1maWxlKSkp
KSkpCg==">code uri</a>
<p>
Next, we define an exporter for source blocks. We will write these to a file too, and put links to them in the exported files. We store the filename of each generated source file in a global variable named <code>*embedded-files*</code> so we can create a new Info metadata entry in the exported PDF.
</p>
<div class="org-src-container">
<pre class="src src-emacs-lisp" id="src-block-format">(<span style="color: #0000FF;">defun</span> <span style="color: #006699;">my-src-block-format</span> (src-block contents info)
<span style="color: #036A07;">"Custom export for src-blocks.</span>
<span style="color: #036A07;">Saves code in block for embedding. Provides backend-specific</span>
<span style="color: #036A07;">output."</span>
(<span style="color: #0000FF;">let*</span> ((srcname (org-element-property <span style="color: #006FE0;">:name</span> src-block))
(lang (org-element-property <span style="color: #006FE0;">:language</span> src-block))
(value (org-element-property <span style="color: #006FE0;">:value</span> src-block))
(format (elt (plist-get info <span style="color: #006FE0;">:back-end</span>) 2))
(exts '((<span style="color: #008000;">"python"</span> . <span style="color: #008000;">".py"</span>)
(<span style="color: #008000;">"emacs-lisp"</span> . <span style="color: #008000;">".elisp"</span>)))
(fname (concat
(<span style="color: #0000FF;">or</span> srcname (md5 value))
(cdr (assoc lang exts))))
(data-uri-data))
(<span style="color: #0000FF;">with-temp-file</span> fname
(insert value)
(<span style="color: #0000FF;">setq</span> data-uri-data (base64-encode-string
(buffer-string))))
(add-to-list '*embedded-files* fname)
(<span style="color: #0000FF;">cond</span>
<span style="color: #8D8D84;">;; </span><span style="color: #8D8D84; font-style: italic;">HTML export</span>
((eq format 'html)
(concat
(org-html-src-block src-block contents info)
(format <span style="color: #008000;">"<a href=\"%s\">%s</a>"</span> fname fname)
<span style="color: #008000;">" "</span>
(format (concat <span style="color: #008000;">"<a href=\"data:text/%s;"</span>
<span style="color: #008000;">"charset=US-ASCII;base64,"</span>
<span style="color: #008000;">"%s\">code uri</a>"</span>)
lang data-uri-data)))
<span style="color: #8D8D84;">;; </span><span style="color: #8D8D84; font-style: italic;">LaTeX/PDF export</span>
((eq format 'latex)
(concat
(org-latex-src-block src-block contents info)
<span style="color: #008000;">"\n"</span>
(format <span style="color: #008000;">"%s: \\attachfile{%s}"</span> fname fname))))))
</pre>
</div><a href="src-block-format.elisp">src-block-format.elisp</a> <a href="data:text/emacs-lisp;charset=US-ASCII;base64,KGRlZnVuIG15LXNyYy1ibG9jay1mb3JtYXQgKHNyYy1ibG9jayBjb250ZW50cyBpbmZvKQogICJD
dXN0b20gZXhwb3J0IGZvciBzcmMtYmxvY2tzLgpTYXZlcyBjb2RlIGluIGJsb2NrIGZvciBlbWJl
ZGRpbmcuIFByb3ZpZGVzIGJhY2tlbmQtc3BlY2lmaWMKb3V0cHV0LiIKICAobGV0KiAoKHNyY25h
bWUgKG9yZy1lbGVtZW50LXByb3BlcnR5IDpuYW1lIHNyYy1ibG9jaykpCgkgKGxhbmcgKG9yZy1l
bGVtZW50LXByb3BlcnR5IDpsYW5ndWFnZSBzcmMtYmxvY2spKQoJICh2YWx1ZSAob3JnLWVsZW1l
bnQtcHJvcGVydHkgOnZhbHVlIHNyYy1ibG9jaykpCiAgICAgICAgIChmb3JtYXQgKGVsdCAocGxp
c3QtZ2V0IGluZm8gOmJhY2stZW5kKSAyKSkKCSAoZXh0cyAnKCgicHl0aG9uIiAuICIucHkiKQoJ
CSAoImVtYWNzLWxpc3AiIC4gIi5lbGlzcCIpKSkKCSAoZm5hbWUgKGNvbmNhdAoJCSAob3Igc3Jj
bmFtZSAobWQ1IHZhbHVlKSkKCQkgKGNkciAoYXNzb2MgbGFuZyBleHRzKSkpKQoJIChkYXRhLXVy
aS1kYXRhKSkKCiAgICAod2l0aC10ZW1wLWZpbGUgZm5hbWUKICAgICAgKGluc2VydCB2YWx1ZSkK
ICAgICAgKHNldHEgZGF0YS11cmktZGF0YSAoYmFzZTY0LWVuY29kZS1zdHJpbmcKCQkJICAgKGJ1
ZmZlci1zdHJpbmcpKSkpCgogICAgKGFkZC10by1saXN0ICcqZW1iZWRkZWQtZmlsZXMqIGZuYW1l
KQoKICAgIChjb25kCiAgICAgOzsgSFRNTCBleHBvcnQKICAgICAoKGVxIGZvcm1hdCAnaHRtbCkK
ICAgICAgKGNvbmNhdAogICAgICAgKG9yZy1odG1sLXNyYy1ibG9jayBzcmMtYmxvY2sgY29udGVu
dHMgaW5mbykKICAgICAgIChmb3JtYXQgIjxhIGhyZWY9XCIlc1wiPiVzPC9hPiIgZm5hbWUgZm5h
bWUpCiAgICAgICAiICIKICAgICAgIChmb3JtYXQgKGNvbmNhdCAiPGEgaHJlZj1cImRhdGE6dGV4
dC8lczsiCiAgICAgICAgICAgICAgICAgICAgICAgImNoYXJzZXQ9VVMtQVNDSUk7YmFzZTY0LCIK
ICAgICAgICAgICAgICAgICAgICAgICAiJXNcIj5jb2RlIHVyaTwvYT4iKQoJICAgICAgIGxhbmcg
ZGF0YS11cmktZGF0YSkpKQogICAgIDs7IExhVGVYL1BERiBleHBvcnQKICAgICAoKGVxIGZvcm1h
dCAnbGF0ZXgpCiAgICAgIChjb25jYXQKICAgICAgIChvcmctbGF0ZXgtc3JjLWJsb2NrIHNyYy1i
bG9jayBjb250ZW50cyBpbmZvKQogICAgICAgIlxuIgogICAgICAgKGZvcm1hdCAiJXM6IFxcYXR0
YWNoZmlsZXslc30iIGZuYW1lIGZuYW1lKSkpKSkpCg==">code uri</a>
<p>
Finally, we also modify the results of a code block so they will appear in a gray box and stand out from the text more clearly.
</p>
<div class="org-src-container">
<pre class="src src-emacs-lisp">(<span style="color: #0000FF;">defun</span> <span style="color: #006699;">my-results</span> (fixed-width contents info)
<span style="color: #036A07;">"Transform a results block to make it more visible."</span>
(<span style="color: #0000FF;">let</span> ((results (org-element-property <span style="color: #006FE0;">:results</span> fixed-width))
(format (elt (plist-get info <span style="color: #006FE0;">:back-end</span>) 2))
(value (org-element-property <span style="color: #006FE0;">:value</span> fixed-width)))
(<span style="color: #0000FF;">cond</span>
((eq 'latex format)
(format <span style="color: #008000;">"\\begin{tcolorbox}</span>
<span style="color: #008000;">\\begin{verbatim}</span>
<span style="color: #008000;">RESULTS: %s</span>
<span style="color: #008000;">\\end{verbatim}</span>
<span style="color: #008000;">\\end{tcolorbox}"</span>
value))
(t
(format <span style="color: #008000;">"<pre>RESULTS: %s</pre>"</span> value)))))
</pre>
</div><a href="dafeb6b72e57a11595885a79d0ce2cbe.elisp">dafeb6b72e57a11595885a79d0ce2cbe.elisp</a> <a href="data:text/emacs-lisp;charset=US-ASCII;base64,KGRlZnVuIG15LXJlc3VsdHMgKGZpeGVkLXdpZHRoIGNvbnRlbnRzIGluZm8pCiAgIlRyYW5zZm9y
bSBhIHJlc3VsdHMgYmxvY2sgdG8gbWFrZSBpdCBtb3JlIHZpc2libGUuIgogIChsZXQgKChyZXN1
bHRzIChvcmctZWxlbWVudC1wcm9wZXJ0eSA6cmVzdWx0cyBmaXhlZC13aWR0aCkpCgkoZm9ybWF0
IChlbHQgKHBsaXN0LWdldCBpbmZvIDpiYWNrLWVuZCkgMikpCgkodmFsdWUgKG9yZy1lbGVtZW50
LXByb3BlcnR5IDp2YWx1ZSBmaXhlZC13aWR0aCkpKQogICAgKGNvbmQKICAgICAoKGVxICdsYXRl
eCBmb3JtYXQpCiAgICAgIChmb3JtYXQgIlxcYmVnaW57dGNvbG9yYm94fQpcXGJlZ2lue3ZlcmJh
dGltfQpSRVNVTFRTOiAlcwpcXGVuZHt2ZXJiYXRpbX0KXFxlbmR7dGNvbG9yYm94fSIKCSAgICAg
IHZhbHVlKSkKICAgICAodAogICAgICAoZm9ybWF0ICI8cHJlPlJFU1VMVFM6ICVzPC9wcmU+IiB2
YWx1ZSkpKSkpCg==">code uri</a>
<pre>RESULTS: my-results
</pre>
<p>
An author may also choose to embed a file into their document, using the attachfile package for LaTeX. Here, we leverage the ability of org-mode to create functional links that can be exported differently for LaTeX and HTML. We will create an attachfile link, and set it up to export as a LaTeX command or as a data URI for HTML.
</p>
<div class="org-src-container">
<pre class="src src-emacs-lisp" id="attachfile-link">(org-add-link-type
<span style="color: #008000;">"attachfile"</span>
(<span style="color: #0000FF;">lambda</span> (path) (org-open-file path))
<span style="color: #8D8D84;">;; </span><span style="color: #8D8D84; font-style: italic;">formatting</span>
(<span style="color: #0000FF;">lambda</span> (path desc format)
(<span style="color: #0000FF;">cond</span>
((eq format 'html)
<span style="color: #8D8D84;">;; </span><span style="color: #8D8D84; font-style: italic;">we want a data URI to the file name</span>
(<span style="color: #0000FF;">let*</span> ((content
(<span style="color: #0000FF;">with-temp-buffer</span>
(insert-file-contents path)
(buffer-string)))
(data-uri
(base64-encode-string
(encode-coding-string content 'utf-8))))
(add-to-list '*embedded-files* path)
(format (concat <span style="color: #008000;">"<a href=\"data:;base64,"</span>
<span style="color: #008000;">"%s\">%s</a>"</span>)
data-uri
path)))
((eq format 'latex)
<span style="color: #8D8D84;">;; </span><span style="color: #8D8D84; font-style: italic;">write out the latex command</span>
(add-to-list '*embedded-files* path)
(format <span style="color: #008000;">"\\attachfile{%s}"</span> path)))))
</pre>
</div><a href="attachfile-link.elisp">attachfile-link.elisp</a> <a href="data:text/emacs-lisp;charset=US-ASCII;base64,KG9yZy1hZGQtbGluay10eXBlCiAiYXR0YWNoZmlsZSIKIChsYW1iZGEgKHBhdGgpIChvcmctb3Bl
bi1maWxlIHBhdGgpKQogOzsgZm9ybWF0dGluZwogKGxhbWJkYSAocGF0aCBkZXNjIGZvcm1hdCkK
ICAgKGNvbmQKICAgICgoZXEgZm9ybWF0ICdodG1sKQogICAgIDs7IHdlIHdhbnQgYSBkYXRhIFVS
SSB0byB0aGUgZmlsZSBuYW1lCiAgICAgKGxldCogKChjb250ZW50CgkgICAgICh3aXRoLXRlbXAt
YnVmZmVyCgkgICAgICAgKGluc2VydC1maWxlLWNvbnRlbnRzIHBhdGgpCgkgICAgICAgKGJ1ZmZl
ci1zdHJpbmcpKSkKCSAgICAoZGF0YS11cmkKCSAgICAgKGJhc2U2NC1lbmNvZGUtc3RyaW5nCgkg
ICAgICAoZW5jb2RlLWNvZGluZy1zdHJpbmcgY29udGVudCAndXRmLTgpKSkpCiAgICAgICAoYWRk
LXRvLWxpc3QgJyplbWJlZGRlZC1maWxlcyogcGF0aCkKICAgICAgIChmb3JtYXQgKGNvbmNhdCAi
PGEgaHJlZj1cImRhdGE6O2Jhc2U2NCwiCiAgICAgICAgICAgICAgICAgICAgICAgIiVzXCI+JXM8
L2E+IikKCSAgICAgICBkYXRhLXVyaQoJICAgICAgIHBhdGgpKSkKICAgICgoZXEgZm9ybWF0ICds
YXRleCkKICAgICA7OyB3cml0ZSBvdXQgdGhlIGxhdGV4IGNvbW1hbmQKICAgICAoYWRkLXRvLWxp
c3QgJyplbWJlZGRlZC1maWxlcyogcGF0aCkKICAgICAoZm9ybWF0ICJcXGF0dGFjaGZpbGV7JXN9
IiBwYXRoKSkpKSkK">code uri</a>
<p>
Here, we define a derived backend for HTML and LaTeX export. These are identical to the standard export backends, except for the modified behavior of the table and src-block elements.
</p>
<div class="org-src-container">
<pre class="src src-emacs-lisp">(org-export-define-derived-backend 'my-html 'html
<span style="color: #006FE0;">:translate-alist</span> '((table . my-table-format)
(src-block . my-src-block-format)
(fixed-width . my-results)))
(org-export-define-derived-backend 'my-latex 'latex
<span style="color: #006FE0;">:translate-alist</span> '((table . my-table-format)
(src-block . my-src-block-format)
(fixed-width . my-results)))
</pre>
</div><a href="2efb34a32a9c4653ff697c1d00fd294b.elisp">2efb34a32a9c4653ff697c1d00fd294b.elisp</a> <a href="data:text/emacs-lisp;charset=US-ASCII;base64,KG9yZy1leHBvcnQtZGVmaW5lLWRlcml2ZWQtYmFja2VuZCAnbXktaHRtbCAnaHRtbAogIDp0cmFu
c2xhdGUtYWxpc3QgJygodGFibGUgLiBteS10YWJsZS1mb3JtYXQpCgkJICAgICAoc3JjLWJsb2Nr
IC4gbXktc3JjLWJsb2NrLWZvcm1hdCkKCQkgICAgIChmaXhlZC13aWR0aCAuIG15LXJlc3VsdHMp
KSkKCihvcmctZXhwb3J0LWRlZmluZS1kZXJpdmVkLWJhY2tlbmQgJ215LWxhdGV4ICdsYXRleAog
IDp0cmFuc2xhdGUtYWxpc3QgJygodGFibGUgLiBteS10YWJsZS1mb3JtYXQpCiAgICAgICAgICAg
ICAgICAgICAgIChzcmMtYmxvY2sgLiBteS1zcmMtYmxvY2stZm9ybWF0KQoJCSAgICAgKGZpeGVk
LXdpZHRoIC4gbXktcmVzdWx0cykpKQo=">code uri</a>
</div>
<div id="outline-container-sec-4-2-1" class="outline-4">
<h4 id="sec-4-2-1"><span class="section-number-4">4.2.1</span> HTML export</h4>
<div class="outline-text-4" id="text-4-2-1">
<p>
Here we run the command to generate the exported HTML manuscript.
</p>
<div class="org-src-container">
<pre class="src src-emacs-lisp">(browse-url (org-export-to-file 'my-html <span style="color: #008000;">"manuscript.html"</span>))
</pre>
</div><a href="796806ad7fbc93b82453b6044f1738d4.elisp">796806ad7fbc93b82453b6044f1738d4.elisp</a> <a href="data:text/emacs-lisp;charset=US-ASCII;base64,KGJyb3dzZS11cmwgKG9yZy1leHBvcnQtdG8tZmlsZSAnbXktaHRtbCAibWFudXNjcmlwdC5odG1s
IikpCg==">code uri</a>
<pre>RESULTS: #<process open manuscript.html>
</pre>
</div>
</div>
<div id="outline-container-sec-4-2-2" class="outline-4">
<h4 id="sec-4-2-2"><span class="section-number-4">4.2.2</span> PDF export</h4>
<div class="outline-text-4" id="text-4-2-2">
<p>
Here we generate the LaTeX manuscript with the embedded files and info, and then convert it to PDF. After the PDF is created, we insert the new InfoField into the PDF. This export uses the derived exporter described above.
</p>
<div class="org-src-container">
<pre class="src src-emacs-lisp"><span style="color: #8D8D84;">; </span><span style="color: #8D8D84; font-style: italic;">Delete output files, ignoring errors if they do not exist</span>
(<span style="color: #0000FF;">ignore-errors</span>
(delete-file <span style="color: #008000;">"manuscript.tex"</span>)
(delete-file <span style="color: #008000;">"manuscript.pdf"</span>)
(delete-file <span style="color: #008000;">"manuscript-with-embedded-data.pdf"</span>))
<span style="color: #8D8D84;">; </span><span style="color: #8D8D84; font-style: italic;">Initialize embedded-files to an empty list.</span>
(<span style="color: #0000FF;">setq</span> *embedded-files* '())
(<span style="color: #0000FF;">let</span> ((org-latex-minted-options
(append
org-latex-minted-options
'((<span style="color: #008000;">"xleftmargin"</span> <span style="color: #008000;">"\\parindent"</span>)))))
(org-export-to-file 'my-latex <span style="color: #008000;">"manuscript.tex"</span>))
(ox-manuscript-latex-pdf-process <span style="color: #008000;">"manuscript.tex"</span>)
(shell-command <span style="color: #008000;">"pdftk manuscript.pdf dump_data > info.txt"</span>)