-
Notifications
You must be signed in to change notification settings - Fork 0
/
reference.bib
724 lines (668 loc) · 31.4 KB
/
reference.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
@inproceedings{benchmark_sce,
author = {S. {Chintapalli} and D. {Dagit} and B. {Evans} and R. {Farivar} and T. {Graves} and M. {Holderbaugh} and Z. {Liu} and K. {Nusbaum} and K. {Patil} and B. J. {Peng} and P. {Poulosky}},
booktitle = {2016 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)},
title = {Benchmarking Streaming Computation Engines: Storm, Flink and Spark Streaming},
year = {2016},
volume = {},
number = {},
pages = {1789-1792},
doi = {10.1109/IPDPSW.2016.138}
}
@article{funmap,
author = {Samaneh Jozashoori and
David Chaves{-}Fraga and
Enrique Iglesias and
Maria{-}Esther Vidal and
{\'{O}}scar Corcho},
title = {FunMap: Efficient Execution of Functional Mappings for Knowledge Graph
Creation},
journal = {CoRR},
volume = {abs/2008.13482},
year = {2020},
url = {https://arxiv.org/abs/2008.13482},
archivePrefix = {arXiv},
eprint = {2008.13482},
timestamp = {Wed, 16 Sep 2020 11:20:03 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2008-13482.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{symmetric_hash_join,
author = {Wilschut, A.N. and Apers, Peter},
year = {1992},
month = {01},
pages = {68-77},
title = {Dataflow query execution in a parallel main-memory environment},
isbn = {0-8186-2295-4},
journal = {Distributed and Parallel Databases - DPD},
doi = {10.1109/PDIS.1991.183069}
}
@inproceedings{watermark_millwheel,
title = {MillWheel: Fault-Tolerant Stream Processing at Internet Scale},
author = {Tyler Akidau and Alex Balikov and Kaya Bekiroglu and Slava Chernyak and Josh Haberman and Reuven Lax and Sam McVeety and Daniel Mills and Paul Nordstrom and Sam Whittle},
year = {2013},
booktitle = {Very Large Data Bases},
pages = {734--746}
}
@article{grubjoin,
title={Grubjoin: An adaptive, multi-way, windowed stream join with time correlation-aware cpu load shedding},
author={Gedik, Bugra and Wu, Kun-Lung and Philip, S Yu and Liu, Ling},
journal={IEEE Transactions on Knowledge and Data Engineering},
volume={19},
number={10},
pages={1363--1380},
year={2007},
doi={10.1109/TKDE.2007.190630},
publisher={IEEE}
}
@inproceedings{fno_ben,
author = {De Meester, Ben and Dimou, Anastasia and Verborgh, Ruben and Mannens, Erik},
title = {An Ontology to Semantically Declare and Describe Functions},
booktitle = {Proceedings of the 13th Extended Semantic Web Conference: Posters and Demos},
year = 2016,
month = jun,
editor = {Sack, Harald and Rizzo, Giuseppe and Steinmetz, Nadine and Mladeni\'c, Dunja and Auer, S\"oren and Lange, Christoph},
series = {Lecture Notes in Computer Science},
volume = 9989,
publisher = {Springer},
pages = {46--49},
isbn = {978-3-319-47602-5},
doi = {10.1007/978-3-319-47602-5_10},
url = {http://2016.eswc-conferences.org/sites/default/files/papers/Accepted%20Posters%20and%20Demos/ESWC2016_POSTER_An_Ontology_to_Semantically_Declare_Describe_Functions.pdf},
}
@inproceedings{vctw_join,
title={VC-TWJoin: A stream join algorithm based on variable update cycle time window},
author={Ji, Yimu and Liu, Shangdong and Lu, Lili and Lang, Xianbo and Yao, Haichang and Wang, Ruchuan},
booktitle={2018 IEEE 22nd International Conference on Computer Supported Cooperative Work in Design ((CSCWD))},
pages={178--183},
year={2018},
organization={IEEE}
}
@inproceedings{join_tracking,
title={Stream window join: Tracking moving objects in sensor-network databases},
author={Hammad, Moustafa A and Aref, Walid G and Elmagarmid, Ahmed K},
booktitle={15th International Conference on Scientific and Statistical Database Management, 2003.},
pages={75--84},
year={2003},
organization={IEEE}
}
@inproceedings{approximate_window_sem,
title={Window join approximation over data streams with importance semantics},
author={Ojewole, Adegoke and Zhu, Qiang and Hou, Wen-Chi},
booktitle={Proceedings of the 15th ACM international conference on Information and knowledge management},
pages={112--121},
year={2006}
}
@inproceedings{approx_window,
title={Approximate join processing over data streams},
author={Das, Abhinandan and Gehrke, Johannes and Riedewald, Mirek},
booktitle={Proceedings of the 2003 ACM SIGMOD international conference on Management of data},
pages={40--51},
year={2003}
}
@inproceedings{jonas_scotty,
title={Efficient Window Aggregation with General Stream Slicing.},
author={Traub, Jonas and Grulich, Philipp M and Cu{\'e}llar, Alejandro Rodr{\'\i}guez and Bre{\ss}, Sebastian and Katsifodimos, Asterios and Rabl, Tilmann and Markl, Volker},
booktitle={EDBT},
pages={97--108},
year={2019}
}
@inproceedings{scotty,
title={Scotty: Efficient window aggregation for out-of-order stream processing},
author={Traub, Jonas and Grulich, Philipp Marian and Cuellar, Alejandro Rodriguez and Bre{\ss}, Sebastian and Katsifodimos, Asterios and Rabl, Tilmann and Markl, Volker},
booktitle={2018 IEEE 34th International Conference on Data Engineering (ICDE)},
pages={1300--1303},
year={2018},
organization={IEEE}
}
@inproceedings{spade_stream,
author = {Gedik, Bugra and Andrade, Henrique and Wu, Kun-Lung and Yu, Philip S. and Doo, Myungcheol},
title = {SPADE: The System s Declarative Stream Processing Engine},
year = {2008},
isbn = {9781605581026},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/1376616.1376729},
doi = {10.1145/1376616.1376729},
booktitle = {Proceedings of the 2008 ACM SIGMOD International Conference on Management of Data},
pages = {1123–1134},
numpages = {12},
keywords = {distributed data stream processing},
location = {Vancouver, Canada},
series = {SIGMOD '08}
}
@article{stream_standford,
author = {Arasu, Arvind and Babcock, Brian and Babu, Shivnath and Datar, Mayur and Ito, Keith and Motwani, Rajeev and Nishizawa, Itaru and Srivastava, Utkarsh and Thomas, Dilys and Varma, Rohit and Widom, Jennifer},
year = {2003},
month = {01},
pages = {19-26},
title = {STREAM: The Stanford stream data manager},
volume = {26},
journal = {IEEE Data Eng. Bull.}
}
@article{generic_window_sem,
author = {Gedik, Buğra},
year = {2014},
month = {09},
pages = {},
title = {Generic windowing support for extensible stream processing systems},
volume = {44},
journal = {Software: Practice and Experience},
doi = {10.1002/spe.2194}
}
@misc{asynchronous_barrier,
title={Lightweight Asynchronous Snapshots for Distributed Dataflows},
author={Paris Carbone and Gyula Fóra and Stephan Ewen and Seif Haridi and Kostas Tzoumas},
year={2015},
eprint={1506.08603},
archivePrefix={arXiv},
primaryClass={cs.DC}
}
@online{watermark_flink,
author={{Apache Flink}},
title ={Introducing Stream Windows in Apache Flink},
year = 2015,
url ={https://flink.apache.org/news/2015/12/04/Introducing-windows.html},
urldate = {2021-04-24}
}
@inproceedings{storm_twitter,
author = {Toshniwal, Ankit and Taneja, Siddarth and Shukla, Amit and Ramasamy, Karthik and Patel, Jignesh M. and Kulkarni, Sanjeev and Jackson, Jason and Gade, Krishna and Fu, Maosong and Donham, Jake and Bhagat, Nikunj and Mittal, Sailesh and Ryaboy, Dmitriy},
title = {Storm@twitter},
year = {2014},
isbn = {9781450323765},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/2588555.2595641},
doi = {10.1145/2588555.2595641},
booktitle = {Proceedings of the 2014 ACM SIGMOD International Conference on Management of Data},
pages = {147–156},
numpages = {10},
keywords = {stream data management, real-time query processing},
location = {Snowbird, Utah, USA},
series = {SIGMOD '14}
}
@online{kappa_architecture,
author = {Jay Kreps},
title = {Questioning the Lambda Architecture},
year = 2014,
publisher = {O’Reilly Media, Inc.},
url ={https://www.oreilly.com/radar/questioning-the-lambda-architecture/},
urldate = {2021-04-23}
}
@inproceedings{kafka,
title={Kafka: A distributed messaging system for log processing},
author={Kreps, Jay and Narkhede, Neha and Rao, Jun and others},
booktitle={Proceedings of the NetDB},
volume={11},
pages={1--7},
year={2011}
}
@INPROCEEDINGS{hadoop,
author={Shvachko, Konstantin and Kuang, Hairong and Radia, Sanjay and Chansler, Robert},
booktitle={2010 IEEE 26th Symposium on Mass Storage Systems and Technologies (MSST)},
title={The Hadoop Distributed File System},
year={2010},
volume={},
number={},
pages={1-10},
doi={10.1109/MSST.2010.5496972}}
@inproceedings{mapreduce,
title = {MapReduce: Simplified Data Processing on Large Clusters},
author = {Jeffrey Dean and Sanjay Ghemawat},
year = {2004},
booktitle = {OSDI'04: Sixth Symposium on Operating System Design and Implementation},
pages = {137--150},
address = {San Francisco, CA}
}
@book{lambda_arch_book,
author = { Nathan Marz and James Warren },
year = {2015},
title ={Big Data: Principles and best practices of scalable realtime data analysis},
publisher ={Manning},
isbn ={9781617290343},
pages ={328},
}
@online{lambda_arc_bpost,
author = {Nathan Marz},
year = {2011},
title ={How to beat the CAP theorem},
url = {http://nathanmarz.com/blog/how-to-beat-the-cap-theorem.html},
urldate = {2021-04-23},
}
@inproceedings{lambda_arch,
author = {Kiran, Mariam and Murphy, Peter and Monga, Inder and Dugan, Jon and Baveja, Sartaj},
year = {2015},
month = {10},
pages = {2785-2792},
title = {Lambda architecture for cost-effective batch and speed big data processing},
doi = {10.1109/BigData.2015.7364082}
}
@book{batch_duration,
title={ Big Data Analytics Beyond Hadoop: Real-Time Applications with Storm, Spark, and More Hadoop Alternatives},
author={Vijay Srinivas Agneeswaram},
year ={2014},
publisher= {Pearson FT Press},
address = {Upper Saddle River, NJ, USA}
}
@inproceedings{batch_processing,
title={Batch Processing: Definition and Event Log Identification},
author={Niels Martin and Marijke Swennen and B. Depaire and Mieke Jans and A. Caris and K. Vanhoof},
booktitle={SIMPDA},
year={2015}
}
@article{sql,
author = {Chamberlin, Donald and Astrahan, Morton and Eswaran, Kapali and Griffiths, Patricia and Lorie, Raymond and Mehl, James and Reisner, Phyllis and Wade, Bradford},
year = {1976},
month = {11},
pages = {560-575},
title = {SEQUEL 2: A Unified Approach to Data Definition, Manipulation, and Control},
volume = {20},
journal = {IBM Journal of Research and Development},
doi = {10.1147/rd.206.0560}
}
@article{ntp_latency,
author = {M. {Caporaloni} and R. {Ambrosini}},
title = {How closely can a personal computer clock track the utc timescale via the internet?},
journal = {European journal of physics},
volume = {23},
number = {4},
pages ={L17-L21},
year = {2002}
}
@article{benchmark_dsdps,
title={Benchmarking Distributed Stream Data Processing Systems},
author={Jeyhun Karimov and T. Rabl and Asterios Katsifodimos and R. Samarev and H. Heiskanen and V. Markl},
journal={2018 IEEE 34th International Conference on Data Engineering (ICDE)},
year={2018},
pages={1507-1518}
}
@article{bernerslee2001semantic,
abstract = {A new form of Web content that is meaningful to computers will unleash a revolution of new possibilities.},
added-at = {2016-09-07T01:44:07.000+0200},
author = {Berners-Lee, Tim and Hendler, James and Lassila, Ora},
biburl = {https://www.bibsonomy.org/bibtex/2222934145a71a9d6cfbbb375d4d62c1d/nosebrain},
interhash = {e87f09446138a81e6478625da97885b6},
intrahash = {222934145a71a9d6cfbbb375d4d62c1d},
journal = {Scientific American},
keywords = {},
lastdatemodified = {2007-04-27},
lastname = {Berners-Lee},
month = may,
number = 5,
own = {notown},
pages = {34-43},
read = {notread},
timestamp = {2016-09-07T01:44:07.000+0200},
title = {The Semantic Web},
url = {http://www.sciam.com/article.cfm?articleID=00048144-10D2-1C70-84A9809EC588EF21},
volume = 284,
year = 2001
}
@article{big_data_analytics,
author = {Philip Russom},
title = {Big Data Analytics},
journal = {TDWI Best Practices Report },
publisher = {TDWI Research},
volume = {Fourth Quarter},
year = {2011}
}
@incollection{data_stream_management,
author = {Sandra Geisler},
title = {{Data Stream Management Systems}},
booktitle = {Data Exchange, Integration, and Streams},
pages = {275--304},
series = {Dagstuhl Follow-Ups},
isbn = {978-3-939897-61-3},
issn = {1868-8977},
year = {2013},
volume = {5},
editor = {Phokion G. Kolaitis and Maurizio Lenzerini and Nicole Schweikardt},
publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik},
address = {Dagstuhl, Germany},
url = {http://drops.dagstuhl.de/opus/volltexte/2013/4297},
urn = {urn:nbn:de:0030-drops-42975},
doi = {10.4230/DFU.Vol5.10452.275},
annote = {Keywords: Data Streams, Data Stream Management, Data Quality, Query Languages}
}
@article{evalution_of_spe,
author = {G. {van Dongen} and D. {Van den Poel}},
journal = {IEEE Transactions on Parallel and Distributed Systems},
title = {Evaluation of Stream Processing Frameworks},
year = {2020},
volume = {31},
number = {8},
pages = {1845-1858},
doi = {10.1109/TPDS.2020.2978480}
}
@article{facebook_linked_data,
title = {Facebook Linked Data via the Graph API},
author = {Jesse Weaver and P. Tarjan},
journal = {Semantic Web},
year = {2013},
volume = {4},
pages = {245-250}
}
@article{fault_tolerance_dsms,
author = {Gradvohl, Andre and Senger, Hermes and Arantes, Luciana and Sens, Pierre},
year = {2014},
month = {05},
pages = {174-179},
title = {Comparing Distributed Online Stream Processing Systems Considering Fault Tolerance Issues},
volume = {6},
journal = {Journal of Emerging Technologies in Web Intelligence},
doi = {10.4304/jetwi.6.2.174-179}
}
@article{flink,
title = {Apache Flink™: Stream and Batch Processing in a Single Engine},
author = {P. Carbone and Asterios Katsifodimos and Stephan Ewen and V. Markl and Seif Haridi and Kostas Tzoumas},
journal = {IEEE Data Eng. Bull.},
year = {2015},
volume = {38},
pages = {28-38}
}
@misc{flink_fault_tolerance,
title = {Lightweight Asynchronous Snapshots for Distributed Dataflows},
author = {Paris Carbone and Gyula Fóra and Stephan Ewen and Seif Haridi and Kostas Tzoumas},
year = {2015},
eprint = {1506.08603},
archiveprefix = {arXiv},
primaryclass = {cs.DC}
}
@article{golab_data_stream,
author = {Golab, Lukasz and \"{O}zsu, M. Tamer},
title = {Issues in Data Stream Management},
year = {2003},
issue_date = {June 2003},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {32},
number = {2},
issn = {0163-5808},
url = {https://doi.org/10.1145/776985.776986},
doi = {10.1145/776985.776986},
abstract = {Traditional databases store sets of relatively static records with no pre-defined notion of time, unless timestamp attributes are explicitly added. While this model adequately represents commercial catalogues or repositories of personal information, many current and emerging applications require support for on-line analysis of rapidly changing data streams. Limitations of traditional DBMSs in supporting streaming applications have been recognized, prompting research to augment existing technologies and build new systems to manage streaming data. The purpose of this paper is to review recent work in data stream management systems, with an emphasis on application requirements, data models, continuous query languages, and query evaluation.},
journal = {SIGMOD Rec.},
month = jun,
pages = {5–14},
numpages = {10}
}
@online{google_kg,
author = {Amit Singhal},
title = {Introducing the Knowledge Graph: things, not strings},
year = {2012},
mont = {05},
url = {https://blog.google/products/search/introducing-knowledge-graph-things-not/},
urldate = {2020-12-25}
}
@article{graph_of_things,
author = {Phuoc, Danh and Nguyen Mau Quoc, Hoan and Ngo, Hung and Nhat, Tuan and Hauswirth, Manfred},
year = {2016},
month = {03},
pages = {25-35},
title = {The Graph of Things: A step towards the Live Knowledge Graph of connected things},
volume = {37},
journal = {Web Semantics: Science, Services and Agents on the World Wide Web},
doi = {10.1016/j.websem.2016.02.003}
}
@misc{upstream_backup,
title={Fault Tolerance for Stream Processing Engines},
author={Muhammad Anis Uddin Nasir},
year={2020},
eprint={1605.00928},
archivePrefix={arXiv},
primaryClass={cs.DC}
}
@article{chandy_lamport,
author = {Chandy, K. Mani and Lamport, Leslie},
title = {Distributed Snapshots: Determining Global States of Distributed Systems},
year = {1985},
issue_date = {Feb. 1985},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {3},
number = {1},
issn = {0734-2071},
url = {https://doi.org/10.1145/214451.214456},
doi = {10.1145/214451.214456},
abstract = {This paper presents an algorithm by which a process in a distributed system determines a global state of the system during a computation. Many problems in distributed systems can be cast in terms of the problem of detecting global states. For instance, the global state detection algorithm helps to solve an important class of problems: stable property detection. A stable property is one that persists: once a stable property becomes true it remains true thereafter. Examples of stable properties are “computation has terminated,” “ the system is deadlocked” and “all tokens in a token ring have disappeared.” The stable property detection problem is that of devising algorithms to detect a given stable property. Global state detection can also be used for checkpointing.},
journal = {ACM Trans. Comput. Syst.},
month = feb,
pages = {63–75},
numpages = {13}
}
@techreport{hospital_data_monitoring,
author = {Phillips},
title = {Phillips Patient Monitoring},
institution = {Phillips},
year = {n.d.},
url = {https://www.usa.philips.com/healthcare/solutions/patient-monitoring},
bibsource = {https://www.usa.philips.com/healthcare/solutions/patient-monitoring}
}
@inproceedings{hospital_diagnosis,
author = {Choudhury, Ambika
and Gupta, Deepak},
editor = {Kalita, Jugal
and Balas, Valentina Emilia
and Borah, Samarjeet
and Pradhan, Ratika},
title = {A Survey on Medical Diagnosis of Diabetes Using Machine Learning Techniques},
booktitle = {Recent Developments in Machine Learning and Data Analytics},
year = {2019},
publisher = {Springer Singapore},
address = {Singapore},
pages = {67--78},
abstract = {While designing medical diagnosis software, disease prediction is said to be one of the captious tasks. The techniques of machine learning have been successfully employed in assorted applications including medical diagnosis. By developing classifier system, machine learning algorithm may immensely help to solve the health-related issues which can assist the physicians to predict and diagnose diseases at an early stage. We can ameliorate the speed, performance, reliability, and accuracy of diagnosing on the current system for a specific disease by using the machine learning classification algorithms. This paper mainly targets the review of diabetes disease detection using the techniques of machine learning. Further, PIMA Indian Diabetic dataset is employed in machine learning techniques like artificial neural networks, decision tree, random forest, na{\"i}ve Bayes, k-nearest neighbors, support vector machines, and logistic regression and discussed the results with their pros and cons.},
isbn = {978-981-13-1280-9}
}
@article{intro_rdf,
author = {Miller, Eric},
title = {An Introduction to the Resource Description Framework},
journal = {Bulletin of the American Society for Information Science and Technology},
volume = {25},
number = {1},
pages = {15-19},
doi = {https://doi.org/10.1002/bult.105},
url = {https://asistdl.onlinelibrary.wiley.com/doi/abs/10.1002/bult.105},
eprint = {https://asistdl.onlinelibrary.wiley.com/doi/pdf/10.1002/bult.105},
year = {1998}
}
@techreport{JSON-LD,
author = {Pierre-Antoine Champin and Dave Longley and Gregg Kellogg},
title = {{JSON}-LD {1.1}},
month = jul,
note = {https://www.w3.org/TR/2020/REC-json-ld11-20200716/},
year = {2020},
bibsource = {https://w2.syronex.com/jmr/w3c-biblio},
type = {{W3C} Recommendation},
institution = {W3C}
}
@inproceedings{latency_measurement_kafka,
author = {G. {van Dongen} and B. {Steurtewagen} and D. {Van den Poel}},
booktitle = {2018 IEEE International Congress on Big Data (BigData Congress)},
title = {Latency Measurement of Fine-Grained Operations in Benchmarking Distributed Stream Processing Frameworks},
year = {2018},
volume = {},
number = {},
pages = {247-250},
abstract = {This paper describes a benchmark for stream processing frameworks allowing accurate latency benchmarking of fine-grained individual stages of a processing pipeline. By determining the latency of distinct common operations in the processing flow instead of the end-to-end latency, we can form guidelines for efficient processing pipeline design. Additionally, we address the issue of defining time in distributed systems by capturing time on one machine and defining the baseline latency. We validate our benchmark for Apache Flink using a processing pipeline comprising common stream processing operations. Our results show that joins are the most time consuming operation in our processing pipeline. The latency incurred by adding a join operation is 4.5 times higher than for a parsing operation, and the latency gradually becomes more dispersed after adding additional stages.},
keywords = {distributed processing;pipeline processing;latency measurement;fine-grained operations;distributed stream processing;stream processing frameworks;processing flow;end-to-end latency;defining time;baseline latency;common stream processing operations;parsing operation;processing pipeline design;Apache Flink;Benchmark testing;Pipelines;Task analysis;Time measurement;Message systems;Storms;big data applications;distributed stream computing;benchmark;Flink;Kafka},
doi = {10.1109/BigDataCongress.2018.00043},
issn = {},
month = {July}
}
@inproceedings{low_latency_data_stream,
author = {S. {Wu} and M. {Liu} and S. {Ibrahim} and H. {Jin} and L. {Gu} and F. {Chen} and Z. {Liu}},
booktitle = {2018 IEEE 38th International Conference on Distributed Computing Systems (ICDCS)},
title = {TurboStream: Towards Low-Latency Data Stream Processing},
year = {2018},
volume = {},
number = {},
pages = {983-993},
doi = {10.1109/ICDCS.2018.00099}
}
@techreport{N-Triples,
author = {Andy Seaborne and Gavin Carothers},
title = {{RDF} 1.1 N-Triples},
month = feb,
note = {https://www.w3.org/TR/2014/REC-n-triples-20140225/},
year = {2014},
bibsource = {https://w2.syronex.com/jmr/w3c-biblio},
type = {{W3C} Recommendation},
institution = {W3C}
}
@article{operator_aware_window,
author = {Phuoc, Danh},
year = {2016},
month = {04},
pages = {},
title = {Operator-aware Approach for Boosting Performance in Processing RDF streams},
journal = {Journal of Web Semantics},
doi = {10.1016/j.websem.2016.04.001}
}
@techreport{r2rml,
author = {Richard Cyganiak and Souripriya Das and Seema Sundara},
title = {{R2RML}: {RDB} to {RDF} Mapping Language},
month = sep,
note = {https://www.w3.org/TR/2012/REC-r2rml-20120927/},
year = {2012},
bibsource = {https://w2.syronex.com/jmr/w3c-biblio},
type = {{W3C} Recommendation},
institution = {W3C}
}
@techreport{rdf_concepts,
author = {Markus Lanthaler and David Wood and Richard Cyganiak},
title = {{RDF} 1.1 Concepts and Abstract Syntax},
month = feb,
note = {https://www.w3.org/TR/2014/REC-rdf11-concepts-20140225/},
year = {2014},
bibsource = {https://w2.syronex.com/jmr/w3c-biblio},
type = {{W3C} Recommendation},
institution = {W3C}
}
@inproceedings{rdf_gen,
author = {Santipantakis, Georgios and Kotis, Konstantinos and Vouros, George and Doulkeridis, Christos},
year = {2018},
month = {06},
pages = {1-10},
title = {RDF-Gen: Generating RDF from Streaming and Archival Data},
isbn = {978-1-4503-5489-9},
journal = {WIMS '18: Proceedings of the 8th International Conference on Web Intelligence, Mining and Semantics},
doi = {10.1145/3227609.3227658}
}
@article{requirements_dsp,
author = {Stonebraker, Michael and Cetintemel, Ugur and Zdonik, Stan},
year = {2005},
month = {12},
pages = {42-47},
title = {The 8 requirements of real-time stream processing},
volume = {34},
journal = {SIGMOD Record},
doi = {10.1145/1107499.1107504}
}
@inproceedings{rml,
title = {RML: A Generic Language for Integrated RDF Mappings of Heterogeneous Data},
author = {A. Dimou and M. V. Sande and P. Colpaert and R. Verborgh and E. Mannens and R. Walle},
booktitle = {LDOW},
year = {2014}
}
@inproceedings{rml_streamer,
author = {Haesendonck, Gerald and Maroy, Wouter and Heyvaert, Pieter and Verborgh, Ruben and Dimou, Anastasia},
year = {2019},
month = {07},
pages = {1-6},
title = {Parallel RDF generation from heterogeneous big data},
doi = {10.1145/3323878.3325802}
}
@techreport{rml_tech,
author = { Anastasia Dimou and
Miel Vander Sande and
Ben De Meester and
Peter Heyvaert and
Thomas Delva },
title = {RDF Mapping Language (RML)},
month = oct,
url = {https://rml.io/specs/rml/},
year = {2020},
bibsource = {https://rml.io/specs/rml/},
type = {Specification document},
institution = {IDLab - imec - Ghent University}
}
@online{sem_web_stack,
author = {Wikimedia Commons},
title = {Semantic Web Stack},
year = {2014},
urlseen = {29-12-20},
url = {https://commons.wikimedia.org/wiki/File:Semantic_web_stack.svg},
note = {File: \ttfamily{Semantic Web Stack.svg}}
}
@inproceedings{spark_streaming,
author = {Zaharia, Matei and Das, Tathagata and Li, Haoyuan and Hunter, Timothy and Shenker, Scott and Stoica, Ion},
title = {Discretized Streams: Fault-Tolerant Streaming Computation at Scale},
year = {2013},
isbn = {9781450323888},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/2517349.2522737},
doi = {10.1145/2517349.2522737},
abstract = {Many "big data" applications must act on data in real time. Running these applications at ever-larger scales requires parallel platforms that automatically handle faults and stragglers. Unfortunately, current distributed stream processing models provide fault recovery in an expensive manner, requiring hot replication or long recovery times, and do not handle stragglers. We propose a new processing model, discretized streams (D-Streams), that overcomes these challenges. D-Streams enable a parallel recovery mechanism that improves efficiency over traditional replication and backup schemes, and tolerates stragglers. We show that they support a rich set of operators while attaining high per-node throughput similar to single-node systems, linear scaling to 100 nodes, sub-second latency, and sub-second fault recovery. Finally, D-Streams can easily be composed with batch and interactive query models like MapReduce, enabling rich applications that combine these modes. We implement D-Streams in a system called Spark Streaming.},
booktitle = {Proceedings of the Twenty-Fourth ACM Symposium on Operating Systems Principles},
pages = {423–438},
numpages = {16},
location = {Farminton, Pennsylvania},
series = {SOSP '13}
}
@techreport{sparql,
author = {Andy Seaborne and Eric Prud'hommeaux},
title = {{SPARQL} Query Language for {RDF}},
month = jan,
note = {https://www.w3.org/TR/2008/REC-rdf-sparql-query-20080115/},
year = {2008},
bibsource = {https://w2.syronex.com/jmr/w3c-biblio},
type = {{W3C} Recommendation},
institution = {W3C}
}
@inproceedings{sparql_generate,
author = {Lefran{\c{c}}ois, Maxime
and Zimmermann, Antoine
and Bakerally, Noorani},
editor = {Blomqvist, Eva
and Maynard, Diana
and Gangemi, Aldo
and Hoekstra, Rinke
and Hitzler, Pascal
and Hartig, Olaf},
title = {A SPARQL Extension for Generating RDF from Heterogeneous Formats},
booktitle = {The Semantic Web},
year = {2017},
publisher = {Springer International Publishing},
address = {Cham},
pages = {35--50},
abstract = {RDF aims at being the universal abstract data model for structured data on the Web. While there is effort to convert data in RDF, the vast majority of data available on the Web does not conform to RDF. Indeed, exposing data in RDF, either natively or through wrappers, can be very costly. Furthermore, in the emerging Web of Things, resource constraints of devices prevent from processing RDF graphs. Hence one cannot expect that all the data on the Web be available as RDF anytime soon. Several tools can generate RDF from non-RDF data, and transformation or mapping languages have been designed to offer more flexible solutions (GRDDL, XSPARQL, R2RML, RML, CSVW, etc.). In this paper, we introduce a new language, SPARQL-Generate, that generates RDF from: (i) a RDF Dataset, and (ii) a set of documents in arbitrary formats. As SPARQL-Generate is designed as an extension of SPARQL 1.1, it can provably: (i) be implemented on top on any existing SPARQL engine, and (ii) leverage the SPARQL extension mechanism to deal with an open set of formats. Furthermore, we show evidence that (iii) it can be easily learned by knowledge engineers that know SPARQL 1.1, and (iv) our first naive open source implementation performs better than the reference implementation of RML for big transformations.},
isbn = {978-3-319-58068-5}
}
@inproceedings{taelman_iswc_resources_comunica_2018,
author = {Taelman, Ruben and Van Herwegen, Joachim and Vander Sande, Miel and Verborgh, Ruben},
title = {Comunica: a Modular SPARQL Query Engine for the Web},
booktitle = {Proceedings of the 17th International Semantic Web Conference},
year = {2018},
month = oct,
url = {https://comunica.github.io/Article-ISWC2018-Resource/}
}
@inproceedings{triple_wave,
author = {Mauri, Andrea and Calbimonte, Jean-Paul and Dell'Aglio, Daniele and Balduini, Marco and Brambilla, Marco and Della Valle, Emanuele and Aberer, Karl},
year = {2016},
month = {10},
pages = {140-149},
title = {TripleWave: Spreading RDF Streams on the Web},
isbn = {978-3-319-46546-3},
doi = {10.1007/978-3-319-46547-0_15}
}
@techreport{turtle_syntax,
author = {Eric Prud'hommeaux and Gavin Carothers},
title = {{RDF} 1.1 Turtle},
month = feb,
note = {https://www.w3.org/TR/2014/REC-turtle-20140225/},
year = {2014},
bibsource = {https://w2.syronex.com/jmr/w3c-biblio},
type = {{W3C} Recommendation},
institution = {W3C}
}