-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpaper-refs.bib
2840 lines (2663 loc) · 154 KB
/
paper-refs.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@misc{cromwell,
doi = {10.7490/f1000research.1114634.1},
author = {Voss, Kate and Auwera, Geraldine Van Der and Gentry, Jeff},
title = {Full-stack genomics pipelining with GATK4 + WDL + Cromwell},
publisher = {F1000Research},
year = {2017}
}
@Article{debian-med,
author="M{\"o}ller, Steffen
and Prescott, Stuart W.
and Wirzenius, Lars
and Reinholdtsen, Petter
and Chapman, Brad
and Prins, Pjotr
and Soiland-Reyes, Stian
and Kl{\"o}tzl, Fabian
and Bagnacani, Andrea
and Kala{\v{s}}, Mat{\'u}{\v{s}}
and Tille, Andreas
and Crusoe, Michael R.",
title="Robust Cross-Platform Workflows: How Technical and Scientific Communities Collaborate to Develop, Test and Share Best Practices for Data Analysis",
journal="Data Science and Engineering",
year="2017",
month="Sep",
day="01",
volume="2",
number="3",
pages="232--244",
abstract="Information integration and workflow technologies for data analysis have always been major fields of investigation in bioinformatics. A range of popular workflow suites are available to support analyses in computational biology. Commercial providers tend to offer prepared applications remote to their clients. However, for most academic environments with local expertise, novel data collection techniques or novel data analysis, it is essential to have all the flexibility of open-source tools and open-source workflow descriptions. Workflows in data-driven science such as computational biology have considerably gained in complexity. New tools or new releases with additional features arrive at an enormous pace, and new reference data or concepts for quality control are emerging. A well-abstracted workflow and the exchange of the same across work groups have an enormous impact on the efficiency of research and the further development of the field. High-throughput sequencing adds to the avalanche of data available in the field; efficient computation and, in particular, parallel execution motivate the transition from traditional scripts and Makefiles to workflows. We here review the extant software development and distribution model with a focus on the role of integration testing and discuss the effect of common workflow language on distributions of open-source scientific software to swiftly and reliably provide the tools demanded for the execution of such formally described workflows. It is contended that, alleviated from technical differences for the execution on local machines, clusters or the cloud, communities also gain the technical means to test workflow-driven interaction across several software packages.",
issn="2364-1541",
doi="10.1007/s41019-017-0050-4"
}
@article{Gruening2018,
doi = {10.12688/f1000research.15140.1},
url = {https://doi.org/10.12688/f1000research.15140.1},
year = {2018},
month = {jun},
publisher = {F1000 Research, Ltd.},
volume = {7},
pages = {742},
author = {Bjorn Gruening and Olivier Sallou and Pablo Moreno and Felipe da Veiga Leprevost and Herv{\'{e}} M{\'{e}}nager and Dan S{\o}ndergaard and Hannes R\"{o}st and Timo Sachsenberg and Brian O{\textquotesingle}Connor and F{\'{a}}bio Madeira and Victoria Dominguez Del Angel and Michael R. Crusoe and Susheel Varma and Daniel Blankenberg and Rafael C. Jimenez and Yasset Perez-Riverol and},
title = {Recommendations for the packaging and containerizing of bioinformatics software},
journal = {F1000Research}
}
@article{Casati1998,
doi = {10.1016/s0169-023x(97)00033-5},
year = {1998},
month = {jan},
publisher = {Elsevier {BV}},
volume = {24},
number = {3},
pages = {211--238},
author = {F Casati and S Ceri and B Pernici and G Pozzi},
title = {Workflow evolution},
journal = {Data {\&} Knowledge Engineering}
}
@article{docker,
author = {Merkel, Dirk},
title = {Docker: Lightweight Linux Containers for Consistent Development and Deployment},
journal = {Linux Journal},
issue_date = {March 2014},
volume = {2014},
number = {239},
month = mar,
year = {2014},
issn = {1075-3583},
articleno = {2},
url = {https://www.linuxjournal.com/node/1335702},
urldate = {2018-11-29},
acmid = {2600241},
publisher = {Belltown Media},
address = {Houston, TX}
}
@misc{cwl,
doi = {10.6084/m9.figshare.3115156.v2},
author = {Amstutz, Peter and Crusoe, Michael R. and {Nebojša Tijanić} and Chapman, Brad and Chilton, John and Heuer, Michael and Kartashov, Andrey and Leehr, Dan and Ménager, Hervé and Nedeljkovich, Maya and Scales, Matt and Soiland-Reyes, Stian and Stojanovic, Luka},
keywords = {Bioinformatics, Computational Biology, 80301 Bioinformatics Software, Computer Software, 80302 Computer System Architecture, 80501 Distributed and Grid Systems, Distributed Computing},
title = {Common Workflow Language, v1.0},
publisher = {Figshare},
year = {2016}
}
@article{Alterovitz2019,
author = {Alterovitz, Gil AND Dean, Dennis AND Goble, Carole AND Crusoe, Michael R. AND Soiland-Reyes, Stian AND Bell, Amanda AND Hayes, Anais AND Suresh, Anita AND Purkayastha, Anjan AND King, Charles H. AND Taylor, Dan AND Johanson, Elaine AND Thompson, Elaine E. AND Donaldson, Eric AND Morizono, Hiroki AND Tsang, Hsinyi AND Vora, Jeet K. AND Goecks, Jeremy AND Yao, Jianchao AND Almeida, Jonas S. AND Keeney, Jonathon AND Addepalli, KanakaDurga AND Krampis, Konstantinos AND Smith, Krista M. AND Guo, Lydia AND Walderhaug, Mark AND Schito, Marco AND Ezewudo, Matthew AND Guimera, Nuria AND Walsh, Paul AND Kahsay, Robel AND Gottipati, Srikanth AND Rodwell, Timothy C. AND Bloom, Toby AND Lai, Yuching AND Simonyan, Vahan AND Mazumder, Raja},
journal = {PLOS Biology},
publisher = {Public Library of Science},
title = {Enabling precision medicine via standard communication of HTS provenance, analysis, and results},
year = {2019},
month = {12},
volume = {16},
url = {https://doi.org/10.1371/journal.pbio.3000099},
pages = {1-14},
abstract = {This Community Page article presents a communication standard for the provenance of high-throughput sequencing data; a BioCompute Object (BCO) can serve as a history of what was computed, be used as part of a validation process, or provide clarity and transparency of an experimental process to collaborators.},
number = {12},
doi = {10.1371/journal.pbio.3000099}
}
@article {Custovic799,
author = {Custovic, Adnan and Ainsworth, John and Arshad, Hasan and Bishop, Christopher and Buchan, Iain and Cullinan, Paul and Devereux, Graham and Henderson, John and Holloway, John and Roberts, Graham and Turner, Steve and Woodcock, Ashley and Simpson, Angela},
title = {The Study Team for Early Life Asthma Research (STELAR) consortium {\textquoteleft}Asthma e-lab{\textquoteright}: team science bringing data, methods and investigators together},
volume = {70},
number = {8},
pages = {799--801},
year = {2015},
doi = {10.1136/thoraxjnl-2015-206781},
publisher = {BMJ Publishing Group Ltd},
abstract = {We created Asthma e-Lab, a secure web-based research environment to support consistent recording, description and sharing of data, computational/statistical methods and emerging findings across the five UK birth cohorts. The e-Lab serves as a data repository for our unified dataset and provides the computational resources and a scientific social network to support collaborative research. All activities are transparent, and emerging findings are shared via the e-Lab, linked to explanations of analytical methods, thus enabling knowledge transfer. eLab facilitates the iterative interdisciplinary dialogue between clinicians, statisticians, computer scientists, mathematicians, geneticists and basic scientists, capturing collective thought behind the interpretations of findings.},
issn = {0040-6376},
eprint = {https://thorax.bmj.com/content/70/8/799.full.pdf},
journal = {Thorax}
}
@inproceedings{Chirigati2016,
doi = {10.1145/2882903.2899401},
year = {2016},
publisher = {{ACM} Press},
author = {Fernando Chirigati and R{\'{e}}mi Rampin and Dennis Shasha and Juliana Freire},
title = {{ReproZip}},
booktitle = {Proceedings of the 2016 International Conference on Management of Data - {SIGMOD} {\textquotesingle}16}
}
@article{Bergmann2014,
doi = {10.1186/s12859-014-0369-z},
year = {2014},
month = {dec},
publisher = {Springer Nature},
volume = {15},
number = {1},
author = {Frank T Bergmann and Richard Adams and Stuart Moodie and Jonathan Cooper and Mihai Glont and Martin Golebiewski and Michael Hucka and Camille Laibe and Andrew K Miller and David P Nickerson and Brett G Olivier and Nicolas Rodriguez and Herbert M Sauro and Martin Scharm and Stian Soiland-Reyes and Dagmar Waltemath and Florent Yvon and Nicolas Le Nov{\`{e}}re},
title = {{COMBINE} archive and {OMEX} format: one file to share all information to reproduce a modeling project},
journal = {{BMC} Bioinformatics}
}
@article{Springate2014,
doi = {10.1371/journal.pone.0099825},
year = {2014},
month = {jun},
publisher = {Public Library of Science ({PLoS})},
volume = {9},
number = {6},
pages = {e99825},
author = {David A. Springate and Evangelos Kontopantelis and Darren M. Ashcroft and Ivan Olier and Rosa Parisi and Edmore Chamapiwa and David Reeves},
editor = {Irene Petersen},
title = {{ClinicalCodes}: An Online Clinical Codes Repository to Improve the Validity and Reproducibility of Research Using Electronic Medical Records},
journal = {{PLoS} {ONE}}
}
@incollection{Moreau2008,
doi = {10.1007/978-3-540-89965-5_31},
year = {2008},
publisher = {Springer Berlin Heidelberg},
pages = {323--326},
author = {Luc Moreau and Juliana Freire and Joe Futrelle and Robert E. McGrath and Jim Myers and Patrick Paulson},
title = {The Open Provenance Model: An Overview},
booktitle = {Lecture Notes in Computer Science}
}
@article{moreau2009governance,
title={Governance of the open provenance model},
author={Moreau, Luc and Freire, Juliana and Futrelle, Joe and Myers, Jim and Paulson, Patrick},
url = {https://nms.kcl.ac.uk/luc.moreau/papers/governance.pdf},
note = {Accessed 18 Sep 2018},
year={2009},
month={Jun},
day={15}
}
@article{Moreau2015,
doi = {10.1016/j.websem.2015.04.001},
year = {2015},
month = {dec},
publisher = {Elsevier {BV}},
volume = {35},
pages = {235--257},
author = {Luc Moreau and Paul Groth and James Cheney and Timothy Lebo and Simon Miles},
title = {The rationale of {PROV}},
journal = {Web Semantics: Science, Services and Agents on the World Wide Web}
}
@article{Kurtzer2017,
doi = {10.1371/journal.pone.0177459},
year = {2017},
month = {may},
publisher = {Public Library of Science ({PLoS})},
volume = {12},
number = {5},
pages = {e0177459},
author = {Gregory M. Kurtzer and Vanessa Sochat and Michael W. Bauer},
editor = {Attila Gursoy},
title = {Singularity: Scientific containers for mobility of compute},
journal = {{PLOS} {ONE}}
}
@inproceedings{Tolk,
doi = {10.1109/pads.2006.39},
publisher = {{IEEE}},
author = {A. Tolk},
title = {What Comes After the Semantic Web - {PADS} Implications for the Dynamic Web},
booktitle = {20th Workshop on Principles of Advanced and Distributed Simulation ({PADS}{\textquotesingle}06)}
}
@article{Grning2018,
doi = {10.1038/s41592-018-0046-7},
year = {2018},
month = {jul},
publisher = {Springer Nature},
volume = {15},
number = {7},
pages = {475--476},
author = {Bj\"{o}rn Gr\"{u}ning and and Ryan Dale and Andreas Sj\"{o}din and Brad A. Chapman and Jillian Rowe and Christopher H. Tomkins-Tinch and Renan Valieris and Johannes K\"{o}ster},
title = {Bioconda: sustainable and comprehensive software distribution for the life sciences},
journal = {Nature Methods}
}
@article{Ivie2018,
author = {Ivie, Peter and Thain, Douglas},
title = {Reproducibility in Scientific Computing},
journal = {ACM Comput. Surv.},
issue_date = {July 2018},
volume = {51},
number = {3},
month = jul,
year = {2018},
issn = {0360-0300},
pages = {63:1--63:36},
articleno = {63},
numpages = {36},
doi = {10.1145/3186266},
acmid = {3186266},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {Reproducibility, computational science, replicability, reproducible, scientific computing, scientific workflow, scientific workflows, workflow, workflows},
}
@article{sandve_2013,
title = {Ten simple rules for reproducible computational research.},
author = {Sandve, Geir Kjetil and Nekrutenko, Anton and Taylor, James and Hovig, Eivind},
pages = {e1003285},
year = {2013},
month = {oct},
day = {24},
urldate = {2018-07-13},
journal = {{PLoS} Comput Biol},
volume = {9},
number = {10},
doi = {10.1371/journal.pcbi.1003285},
pmid = {24204232},
pmcid = {PMC3812051},
f1000-projects = {CWLProv}
}
@article{Spjuth2015,
doi = {10.1186/s13062-015-0071-8},
year = {2015},
month = {aug},
publisher = {Springer Nature},
volume = {10},
number = {1},
author = {Ola Spjuth and Erik Bongcam-Rudloff and Guillermo Carrasco Hern{\'{a}}ndez and Lukas Forer and Mario Giovacchini and Roman Valls Guimera and Aleksi Kallio and Eija Korpelainen and Maciej M Ka{\'{n}}du{\l}a and Milko Krachunov and David P Kreil and Ognyan Kulev and Pawe{\l} P. {\L}abaj and Samuel Lampa and Luca Pireddu and Sebastian Sch\"{o}nherr and Alexey Siretskiy and Dimitar Vassilev},
title = {Experiences with workflows for automating data-intensive bioinformatics},
journal = {Biology Direct}
}
@article{Nekrutenko2012,
doi = {10.1038/nrg3305},
year = {2012},
month = {sep},
publisher = {Springer Nature},
volume = {13},
number = {9},
pages = {667--672},
author = {Anton Nekrutenko and James Taylor},
title = {Next-generation sequencing data interpretation: enhancing reproducibility and accessibility},
journal = {Nature Reviews Genetics}
}
@article{Alper2018,
doi = {10.3390/informatics5010011},
year = {2018},
month = {feb},
publisher = {{MDPI} {AG}},
volume = {5},
number = {1},
pages = {11},
author = {Pinar Alper and Khalid Belhajjame and Vasa Curcin and Carole Goble},
title = {{LabelFlow} Framework for Annotating Workflow Provenance},
journal = {Informatics}
}
@article{Ison2013,
doi = {10.1093/bioinformatics/btt113},
year = {2013},
month = {mar},
publisher = {Oxford University Press ({OUP})},
volume = {29},
number = {10},
pages = {1325--1332},
author = {J. Ison and M. Kalas and I. Jonassen and D. Bolser and M. Uludag and H. McWilliam and J. Malone and R. Lopez and S. Pettifer and P. Rice},
title = {{EDAM}: an ontology of bioinformatics operations, types of data and identifiers, topics and formats},
journal = {Bioinformatics}
}
@article{ludascher2016brief, title={A Brief Tour Through Provenance in Scientific Workflows and Databases}, ISBN={9783319402260}, ISSN={2198-7254}, doi={10.1007/978-3-319-40226-0_7}, journal={Springer Proceedings in Business and Economics}, publisher={Springer International Publishing}, author={Ludäscher, Bertram}, year={2016}, pages={103–126}}
@article{Littauer2012,
doi = {10.2218/ijdc.v7i2.232},
year = {2012},
month = {oct},
publisher = {Edinburgh University Library},
volume = {7},
number = {2},
pages = {92--100},
author = {Richard Littauer and Karthik Ram and Bertram Lud\"{a}scher and William Michener and Rebecca Koskela},
title = {Trends in Use of Scientific Workflows: Insights from a Public Repository and Recommendations for Best Practice},
journal = {International Journal of Digital Curation}
}
@article{Gymrek2016,
doi = {10.1186/s13742-016-0127-4},
year = {2016},
month = {may},
publisher = {Oxford University Press ({OUP})},
volume = {5},
number = {1},
author = {Melissa Gymrek and Yossi Farjoun},
title = {Recommendations for open data science},
journal = {{GigaScience}}
}
@article{Stodden2014,
doi = {10.5334/jors.ay},
year = {2014},
month = {jul},
publisher = {Ubiquity Press, Ltd.},
volume = {2},
number = {1},
author = {Victoria Stodden and Sheila Miguez},
title = {Best Practices for Computational Science: Software Infrastructure and Environments for Reproducible and Extensible Research},
journal = {Journal of Open Research Software}
}
@inproceedings{Zhao2012,
doi = {10.1109/escience.2012.6404482},
year = {2012},
month = {oct},
publisher = {{IEEE}},
author = {Jun Zhao and Jose Manuel Gomez-Perez and Khalid Belhajjame and Graham Klyne and Esteban Garcia-Cuesta and Aleix Garrido and Kristina Hettne and Marco Roos and David De Roure and Carole Goble},
title = {Why workflows break \textendash Understanding and combating decay in Taverna workflows},
booktitle = {2012 {IEEE} 8th International Conference on E-Science}
}
@article{garijo_2013,
title = {Quantifying reproducibility in computational biology: the case of the tuberculosis drugome.},
author = {Garijo, Daniel and Kinnings, Sarah and Xie, Li and Xie, Lei and Zhang, Yinliang and Bourne, Philip E and Gil, Yolanda},
pages = {e80278},
year = {2013},
month = {nov},
day = {27},
urldate = {2016-04-13},
journal = {{PLoS} {ONE}},
volume = {8},
number = {11},
doi = {10.1371/journal.pone.0080278},
pmid = {24312207},
pmcid = {PMC3842296}
}
@article{stephens_2015,
title = {Big data: astronomical or genomical?},
author = {Stephens, Zachary D and Lee, Skylar Y and Faghri, Faraz and Campbell, Roy H and Zhai, Chengxiang and Efron, Miles J and Iyer, Ravishankar and Schatz, Michael C and Sinha, Saurabh and Robinson, Gene E},
pages = {e1002195},
year = {2015},
month = {jul},
day = {7},
urldate = {2018-07-23},
journal = {{PLoS} Biol},
volume = {13},
number = {7},
issn = {1545-7885},
doi = {10.1371/journal.pbio.1002195},
pmid = {26151137},
pmcid = {PMC4494865},
f1000-projects = {CWLProv},
abstract = {Genomics is a Big Data science and is going to get much bigger, very soon, but it is not known whether the needs of genomics will exceed other Big Data domains. Projecting to the year 2025, we compared genomics with three other major generators of Big Data: astronomy, {YouTube}, and Twitter. Our estimates show that genomics is a "four-headed beast"--it is either on par with or the most demanding of the domains analyzed here in terms of data acquisition, storage, distribution, and analysis. We discuss aspects of new technologies that will need to be developed to rise up and meet the computational challenges that genomics poses for the near future. Now is the time for concerted, community-wide planning for the "genomical" challenges of the next decade.}
}
@article{gonzlezbeltrn_2015,
title = {From Peer-Reviewed to Peer-Reproduced in Scholarly Publishing: The Complementary Roles of Data Models and Workflows in Bioinformatics.},
author = {González-Beltrán, Alejandra and Li, Peter and Zhao, Jun and Avila-Garcia, Maria Susana and Roos, Marco and Thompson, Mark and van der Horst, Eelke and Kaliyaperumal, Rajaram and Luo, Ruibang and Lee, Tin-Lap and Lam, Tak-Wah and Edmunds, Scott C and Sansone, Susanna-Assunta and Rocca-Serra, Philippe},
pages = {e0127612},
url = {http://dx.plos.org/10.1371/journal.pone.0127612},
year = {2015},
month = {jul},
day = {8},
urldate = {2018-07-23},
journal = {{PLoS} {ONE}},
volume = {10},
number = {7},
issn = {1932-6203},
doi = {10.1371/journal.pone.0127612},
pmid = {26154165},
pmcid = {PMC4495984},
f1000-projects = {CWLProv},
abstract = {{MOTIVATION}: Reproducing the results from a scientific paper can be challenging due to the absence of data and the computational tools required for their analysis. In addition, details relating to the procedures used to obtain the published results can be difficult to discern due to the use of natural language when reporting how experiments have been performed. The Investigation/Study/Assay ({ISA}), Nanopublications ({NP}), and Research Objects ({RO}) models are conceptual data modelling frameworks that can structure such information from scientific papers. Computational workflow platforms can also be used to reproduce analyses of data in a principled manner. We assessed the extent by which {ISA}, {NP}, and {RO} models, together with the Galaxy workflow system, can capture the experimental processes and reproduce the findings of a previously published paper reporting on the development of {SOAPdenovo2}, a de novo genome assembler. {RESULTS}: Executable workflows were developed using Galaxy, which reproduced results that were consistent with the published findings. A structured representation of the information in the {SOAPdenovo2} paper was produced by combining the use of {ISA}, {NP}, and {RO} models. By structuring the information in the published paper using these data and scientific workflow modelling frameworks, it was possible to explicitly declare elements of experimental design, variables, and findings. The models served as guides in the curation of scientific information and this led to the identification of inconsistencies in the original published paper, thereby allowing its authors to publish corrections in the form of an errata. {AVAILABILITY}: {SOAPdenovo2} scripts, data, and results are available through the {GigaScience} Database: http://dx.doi.org/10.5524/100044; the workflows are available from {GigaGalaxy}: http://galaxy.cbiit.cuhk.edu.hk; and the representations using the {ISA}, {NP}, and {RO} models are available through the {SOAPdenovo2} case study website http://isa-tools.github.io/soapdenovo2/. {CONTACT}: philippe.rocca-serra@oerc.ox.ac.uk and susanna-assunta.sansone@oerc.ox.ac.uk.}
}
@article{ciccarese_2013,
title = {{PAV} ontology: provenance, authoring and versioning.},
author = {Ciccarese, Paolo and Soiland-Reyes, Stian and Belhajjame, Khalid and Gray, Alasdair Jg and Goble, Carole and Clark, Tim},
pages = {37},
url = {http://dx.doi.org/10.1186/2041-1480-4-37},
year = {2013},
month = {nov},
day = {22},
urldate = {2017-08-16},
journal = {J Biomed Semantics},
volume = {4},
number = {1},
doi = {10.1186/2041-1480-4-37},
pmid = {24267948},
pmcid = {PMC4177195},
f1000-projects = {{CWLProv} and Your publications},
abstract = {{BACKGROUND}: Provenance is a critical ingredient for establishing trust of published scientific content. This is true whether we are considering a data set, a computational workflow, a peer-reviewed publication or a simple scientific claim with supportive evidence. Existing vocabularies such as Dublin Core Terms ({DC} Terms) and the {W3C} Provenance Ontology ({PROV}-O) are domain-independent and general-purpose and they allow and encourage for extensions to cover more specific needs. In particular, to track authoring and versioning information of web resources, {PROV}-O provides a basic methodology but not any specific classes and properties for identifying or distinguishing between the various roles assumed by agents manipulating digital artifacts, such as author, contributor and curator. {RESULTS}: We present the Provenance, Authoring and Versioning ontology ({PAV}, namespace http://purl.org/pav/): a lightweight ontology for capturing "just enough" descriptions essential for tracking the provenance, authoring and versioning of web resources. We argue that such descriptions are essential for digital scientific content. {PAV} distinguishes between contributors, authors and curators of content and creators of representations in addition to the provenance of originating resources that have been accessed, transformed and consumed. We explore five projects (and communities) that have adopted {PAV} illustrating their usage through concrete examples. Moreover, we present mappings that show how {PAV} extends the {W3C} {PROV}-O ontology to support broader interoperability. {METHOD}: The initial design of the {PAV} ontology was driven by requirements from the {AlzSWAN} project with further requirements incorporated later from other projects detailed in this paper. The authors strived to keep {PAV} lightweight and compact by including only those terms that have demonstrated to be pragmatically useful in existing applications, and by recommending terms from existing ontologies when plausible. {DISCU\SSION}: We analyze and compare {PAV} with related approaches, namely Provenance Vocabulary ({PRV}), {DC} Terms and {BIBFRAME}. We identify similarities and analyze differences between those vocabularies and {PAV}, outlining strengths and weaknesses of our proposed model. We specify {SKOS} mappings that align {PAV} with {DC} Terms. We conclude the paper with general remarks on the applicability of {PAV}.}
}
@article{wolstencroft_2013,
title = {The Taverna workflow suite: designing and executing workflows of Web Services on the desktop, web or in the cloud.},
author = {Wolstencroft, Katherine and Haines, Robert and Fellows, Donal and Williams, Alan and Withers, David and Owen, Stuart and Soiland-Reyes, Stian and Dunlop, Ian and Nenadic, Aleksandra and Fisher, Paul and Bhagat, Jiten and Belhajjame, Khalid and Bacall, Finn and Hardisty, Alex and Nieva de la Hidalga, Abraham and Balcazar Vargas, Maria P and Sufi, Shoaib and Goble, Carole},
pages = {W557-61},
year = {2013},
month = {jul},
journal = {Nucleic Acids Res},
volume = {41},
number = {Web Server issue},
doi = {10.1093/nar/gkt328},
pmid = {23640334},
pmcid = {PMC3692062},
f1000-projects = {{CWL} and {CWLProv} and Debianpaper and Your publications},
abstract = {The Taverna workflow tool suite (http://www.taverna.org.uk) is designed to combine distributed Web Services and/or local tools into complex analysis pipelines. These pipelines can be executed on local desktop machines or through larger infrastructure (such as supercomputers, Grids or cloud environments), using the Taverna Server. In bioinformatics, Taverna workflows are typically used in the areas of high-throughput omics analyses (for example, proteomics or transcriptomics), or for evidence gathering methods involving text mining or data mining. Through Taverna, scientists have access to several thousand different tools and resources that are freely available from a large range of life science institutions. Once constructed, the workflows are reusable, executable bioinformatics protocols that can be shared, reused and repurposed. A repository of public workflows is available at http://www.myexperiment.org. This article provides an update to the Taverna tool suite, highlighting new features and developments in the workbench and the Taverna Server.}
}
@article{peng_2011,
title = {Reproducible research in computational science.},
author = {Peng, Roger D},
pages = {1226-1227},
year = {2011},
month = {dec},
day = {2},
journal = {Science},
volume = {334},
number = {6060},
doi = {10.1126/science.1213847},
pmid = {22144613},
pmcid = {PMC3383002},
f1000-projects = {CWLProv},
abstract = {Computational science has led to exciting new developments, but the nature of the work has exposed limitations in our ability to evaluate published findings. Reproducibility has the potential to serve as a minimum standard for judging scientific claims when full independent replication of a study is not possible.}
}
@article{ison_2013,
title = {{EDAM}: an ontology of bioinformatics operations, types of data and identifiers, topics and formats.},
author = {Ison, Jon and Kalas, Matús and Jonassen, Inge and Bolser, Dan and Uludag, Mahmut and {McWilliam}, Hamish and Malone, James and Lopez, Rodrigo and Pettifer, Steve and Rice, Peter},
pages = {1325-1332},
year = {2013},
month = {may},
day = {15},
journal = {Bioinformatics},
volume = {29},
number = {10},
doi = {10.1093/bioinformatics/btt113},
pmid = {23479348},
pmcid = {PMC3654706},
f1000-projects = {{CWLProv} and Debianpaper},
abstract = {{MOTIVATION}: Advancing the search, publication and integration of bioinformatics tools and resources demands consistent machine-understandable descriptions. A comprehensive ontology allowing such descriptions is therefore required. {RESULTS}: {EDAM} is an ontology of bioinformatics operations (tool or workflow functions), types of data and identifiers, application domains and data formats. {EDAM} supports semantic annotation of diverse entities such as Web services, databases, programmatic libraries, standalone tools, interactive applications, data schemas, datasets and publications within bioinformatics. {EDAM} applies to organizing and finding suitable tools and data and to automating their integration into complex applications or workflows. It includes over 2200 defined concepts and has successfully been used for annotations and implementations. {AVAILABILITY}: The latest stable version of {EDAM} is available in {OWL} format from http://edamontology.org/{EDAM}.owl and in {OBO} format from http://edamontology.org/{EDAM}.obo. It can be viewed online at the {NCBO} {BioPortal} and the {EBI} Ontology Lookup Service. For documentation and license please refer to http://edamontology.org. This article describes version 1.2 available at http://edamontology.org/{EDAM\_1}.2.owl. {CONTACT}: jison@ebi.ac.uk.}
}
@article{muse_1994,
title = {A likelihood approach for comparing synonymous and nonsynonymous nucleotide substitution rates, with application to the chloroplast genome.},
author = {Muse, S V and Gaut, B S},
pages = {715-724},
year = {1994},
month = {sep},
journal = {Mol Biol Evol},
volume = {11},
number = {5},
doi = {10.1093/oxfordjournals.molbev.a040152},
pmid = {7968485},
f1000-projects = {CWLProv},
abstract = {A model of {DNA} sequence evolution applicable to coding regions is presented. This represents the first evolutionary model that accounts for dependencies among nucleotides within a codon. The model uses the codon, as opposed to the nucleotide, as the unit of evolution, and is parameterized in terms of synonymous and nonsynonymous nucleotide substitution rates. One of the model's advantages over those used in methods for estimating synonymous and nonsynonymous substitution rates is that it completely corrects for multiple hits at a codon, rather than taking a parsimony approach and considering only pathways of minimum change between homologous codons. Likelihood-ratio versions of the relative-rate test are constructed and applied to data from the complete chloroplast {DNA} sequences of Oryza sativa, Nicotiana tabacum, and Marchantia polymorpha. Results of these tests confirm previous findings that substitution rates in the chloroplast genome are subject to both lineage-specific and locus-specific effects. Additionally, the new tests suggest tha the rate heterogeneity is due primarily to differences in nonsynonymous substitution rates. Simulations help confirm previous suggestions that silent sites are saturated, leaving no evidence of heterogeneity in synonymous substitution rates.}
}
@article{wilkinson_2016,
title = {The {FAIR} Guiding Principles for scientific data management and stewardship.},
author = {Wilkinson, Mark D and Dumontier, Michel and Aalbersberg, I Jsbrand Jan and Appleton, Gabrielle and Axton, Myles and Baak, Arie and Blomberg, Niklas and Boiten, Jan-Willem and da Silva Santos, Luiz Bonino and Bourne, Philip E and Bouwman, Jildau and Brookes, Anthony J and Clark, Tim and Crosas, Mercè and Dillo, Ingrid and Dumon, Olivier and Edmunds, Scott and Evelo, Chris T and Finkers, Richard and Gonzalez-Beltran, Alejandra and Gray, Alasdair J G and Groth, Paul and Goble, Carole and Grethe, Jeffrey S and Heringa, Jaap and 't Hoen, Peter A C and Hooft, Rob and Kuhn, Tobias and Kok, Ruben and Kok, Joost and Lusher, Scott J and Martone, Maryann E and Mons, Albert and Packer, Abel L and Persson, Bengt and Rocca-Serra, Philippe and Roos, Marco and van Schaik, Rene and Sansone, Susanna-Assunta and Schultes, Erik and Sengstag, Thierry and Slater, Ted and Strawn, George and Swertz, Morris A and Thompson, Mark and van der Lei, Johan and van Mulligen, Erik and Velterop, Jan and Waagmeester, Andra and Wittenburg, Peter and Wolstencroft, Katherine and Zhao, Jun and Mons, Barend},
pages = {160018},
year = {2016},
month = {mar},
day = {15},
urldate = {2018-07-13},
journal = {Sci Data},
volume = {3},
issn = {2052-4463},
doi = {10.1038/sdata.2016.18},
pmid = {26978244},
pmcid = {PMC4792175},
f1000-projects = {CWLProv},
abstract = {There is an urgent need to improve the infrastructure supporting the reuse of scholarly data. A diverse set of stakeholders-representing academia, industry, funding agencies, and scholarly publishers-have come together to design and jointly endorse a concise and measureable set of principles that we refer to as the {FAIR} Data Principles. The intent is that these may act as a guideline for those wishing to enhance the reusability of their data holdings. Distinct from peer initiatives that focus on the human scholar, the {FAIR} Principles put specific emphasis on enhancing the ability of machines to automatically find and use the data, in addition to supporting its reuse by individuals. This Comment is the first formal publication of the {FAIR} Principles, and includes the rationale behind them, and some exemplar implementations in the community.}
}
@article{freire_2012,
title = {Making Computations and Publications Reproducible with {VisTrails}},
author = {Freire, Juliana and Silva, Claudio T.},
pages = {18-25},
year = {2012},
month = {jul},
journal = {Comput Sci Eng},
volume = {14},
number = {4},
issn = {1521-9615},
doi = {10.1109/{MCSE}.2012.76},
f1000-projects = {CWLProv}
}
@article{afgan_2016,
title = {The Galaxy platform for accessible, reproducible and collaborative biomedical analyses: 2016 update.},
author = {Afgan, Enis and Baker, Dannon and van den Beek, Marius and Blankenberg, Daniel and Bouvier, Dave and Čech, Martin and Chilton, John and Clements, Dave and Coraor, Nate and Eberhard, Carl and Grüning, Björn and Guerler, Aysam and Hillman-Jackson, Jennifer and Von Kuster, Greg and Rasche, Eric and Soranzo, Nicola and Turaga, Nitesh and Taylor, James and Nekrutenko, Anton and Goecks, Jeremy},
pages = {W3-W10},
year = {2016},
month = {jul},
day = {8},
journal = {Nucleic Acids Res},
volume = {44},
number = {W1},
doi = {10.1093/nar/gkw343},
pmid = {27137889},
pmcid = {PMC4987906},
f1000-projects = {{CWL} and {CWLProv} and Debianpaper},
abstract = {High-throughput data production technologies, particularly 'next-generation' {DNA} sequencing, have ushered in widespread and disruptive changes to biomedical research. Making sense of the large datasets produced by these technologies requires sophisticated statistical and computational methods, as well as substantial computational power. This has led to an acute crisis in life sciences, as researchers without informatics training attempt to perform computation-dependent analyses. Since 2005, the Galaxy project has worked to address this problem by providing a framework that makes advanced computational tools usable by non experts. Galaxy seeks to make data-intensive research more accessible, transparent and reproducible by providing a Web-based environment in which users can perform computational analyses and have all of the details automatically tracked for later inspection, publication, or reuse. In this report we highlight recently added features enabling biomedical analyses on a large scale. \copyright The Author(s) 2016. Published by Oxford University Press on behalf of Nucleic Acids Research.}
}
@article{leipzig_2017,
title = {A review of bioinformatic pipeline frameworks.},
author = {Leipzig, Jeremy},
pages = {530-536},
year = {2017},
month = {may},
day = {1},
journal = {Brief Bioinformatics},
volume = {18},
number = {3},
doi = {10.1093/bib/bbw020},
pmid = {27013646},
pmcid = {PMC5429012},
f1000-projects = {{CWL} and {CWLProv} and Debianpaper},
abstract = {High-throughput bioinformatic analyses increasingly rely on pipeline frameworks to process sequence and metadata. Modern implementations of these frameworks differ on three key dimensions: using an implicit or explicit syntax, using a configuration, convention or class-based design paradigm and offering a command line or workbench interface. Here I survey and compare the design philosophies of several current pipeline frameworks. I provide practical recommendations based on analysis requirements and the user base. \copyright The Author 2016. Published by Oxford University Press.}
}
@article{smith_2016,
title = {Software citation principles},
author = {Smith, Arfon M. and Katz, Daniel S. and Niemeyer, Kyle E. and {FORCE11} Software Citation Working Group},
pages = {e86},
url = {https://peerj.com/articles/cs-86},
year = {2016},
month = {sep},
day = {19},
urldate = {2017-09-26},
journal = {{PeerJ} Computer Science},
volume = {2},
issn = {2376-5992},
doi = {10.7717/peerj-cs.86},
f1000-projects = {{CWLProv} and Reproducibility}
}
@article{bergmann_2014,
title = {{COMBINE} archive and {OMEX} format: one file to share all information to reproduce a modeling project.},
author = {Bergmann, Frank T and Adams, Richard and Moodie, Stuart and Cooper, Jonathan and Glont, Mihai and Golebiewski, Martin and Hucka, Michael and Laibe, Camille and Miller, Andrew K and Nickerson, David P and Olivier, Brett G and Rodriguez, Nicolas and Sauro, Herbert M and Scharm, Martin and Soiland-Reyes, Stian and Waltemath, Dagmar and Yvon, Florent and Le Novère, Nicolas},
pages = {369},
url = {http://dx.doi.org/10.1186/s12859-014-0369-z},
year = {2014},
month = {dec},
day = {14},
urldate = {2017-08-16},
journal = {{BMC} Bioinformatics},
volume = {15},
number = {1},
doi = {10.1186/s12859-014-0369-z},
pmid = {25494900},
pmcid = {PMC4272562},
f1000-projects = {{CWLProv} and Your publications},
abstract = {{BACKGROUND}: With the ever increasing use of computational models in the biosciences, the need to share models and reproduce the results of published studies efficiently and easily is becoming more important. To this end, various standards have been proposed that can be used to describe models, simulations, data or other essential information in a consistent fashion. These constitute various separate components required to reproduce a given published scientific result. {RESULTS}: We describe the Open Modeling {EXchange} format ({OMEX}). Together with the use of other standard formats from the Computational Modeling in Biology Network ({COMBINE}), {OMEX} is the basis of the {COMBINE} Archive, a single file that supports the exchange of all the information necessary for a modeling and simulation experiment in biology. An {OMEX} file is a {ZIP} container that includes a manifest file, listing the content of the archive, an optional metadata file adding information about the archive and its content, and the files describing the model. The content of a {COMBINE} Archive consists of files encoded in {COMBINE} standards whenever possible, but may include additional files defined by an Internet Media Type. Several tools that support the {COMBINE} Archive are available, either as independent libraries or embedded in modeling software. {CONCLUSIONS}: The {COMBINE} Archive facilitates the reproduction of modeling and simulation experiments in biology by embedding all the relevant information in one file. Having all the information stored and exchanged at once also helps in building activity logs and audit trails. We anticipate that the {COMBINE} Archive will become a significant help for modellers, as the domain moves to larger, more complex experiments such as multi-scale models of organs, digital organisms, and bioengineering.}
}
@inproceedings{garijo_2011,
title = {A new approach for publishing workflows: Abstractions, standards, and linked data},
author = {Garijo, Daniel and Gil, Yolanda},
pages = {47},
publisher = {{ACM} Press},
url = {http://dl.acm.org/citation.cfm?doid=2110497.2110504},
year = {2011},
month = {nov},
day = {14},
urldate = {2018-07-13},
isbn = {9781450311007},
doi = {10.1145/2110497.2110504},
address = {New York, New York, {USA}},
f1000-projects = {CWLProv},
booktitle = {Proceedings of the 6th workshop on Workflows in support of large-scale science - {WORKS} '11}
}
@article{kaushik_2017,
title = {Rabix: an open-source workflow executor supporting recomputability and interoperability of workflow descriptions.},
author = {Kaushik, Gaurav and Ivkovic, Sinisa and Simonovic, Janko and Tijanic, Nebojsa and Davis-Dusenbery, Brandi and Kural, Deniz},
pages = {154-165},
year = {2017},
urldate = {2017-11-03},
journal = {Pac Symp Biocomput},
volume = {22},
doi = {10.1142/9789813207813\_0016},
pmid = {27896971},
pmcid = {PMC5166558},
f1000-projects = {{CWL} and {CWLProv}},
abstract = {As biomedical data has become increasingly easy to generate in large quantities, the methods used to analyze it have proliferated rapidly. Reproducible and reusable methods are required to learn from large volumes of data reliably. To address this issue, numerous groups have developed workflow specifications or execution engines, which provide a framework with which to perform a sequence of analyses. One such specification is the Common Workflow Language, an emerging standard which provides a robust and flexible framework for describing data analysis tools and workflows. In addition, reproducibility can be furthered by executors or workflow engines which interpret the specification and enable additional features, such as error logging, file organization, optim1izations to computation and job scheduling, and allow for easy computing on large volumes of data. To this end, we have developed the Rabix Executor, an open-source workflow engine for the purposes of improving reproducibility through reusability and interoperability of workflow descriptions.}
}
@article{stodden_2016,
title = {Enhancing reproducibility for computational methods.},
author = {Stodden, Victoria and {McNutt}, Marcia and Bailey, David H and Deelman, Ewa and Gil, Yolanda and Hanson, Brooks and Heroux, Michael A and Ioannidis, John P A and Taufer, Michela},
pages = {1240-1241},
year = {2016},
month = {dec},
day = {9},
urldate = {2018-07-23},
journal = {Science},
volume = {354},
number = {6317},
issn = {0036-8075},
doi = {10.1126/science.aah6168},
pmid = {27940837},
f1000-projects = {CWLProv}
}
@article{oconnor_2017,
title = {The Dockstore: enabling modular, community-focused sharing of Docker-based genomics tools and workflows.},
author = {O'Connor, Brian D and Yuen, Denis and Chung, Vincent and Duncan, Andrew G and Liu, Xiang Kun and Patricia, Janice and Paten, Benedict and Stein, Lincoln and Ferretti, Vincent},
pages = {52},
year = {2017},
month = {jan},
day = {18},
urldate = {2018-07-13},
journal = {F1000Res},
volume = {6},
doi = {10.12688/f1000research.10137.1},
pmid = {28344774},
pmcid = {PMC5333608},
f1000-projects = {{CWLProv} and Debianpaper},
abstract = {As genomic datasets continue to grow, the feasibility of downloading data to a local organization and running analysis on a traditional compute environment is becoming increasingly problematic. Current large-scale projects, such as the {ICGC} {PanCancer} Analysis of Whole Genomes ({PCAWG}), the Data Platform for the U.S. Precision Medicine Initiative, and the {NIH} Big Data to Knowledge Center for Translational Genomics, are using cloud-based infrastructure to both host and perform analysis across large data sets. In {PCAWG}, over 5,800 whole human genomes were aligned and variant called across 14 cloud and {HPC} environments; the processed data was then made available on the cloud for further analysis and sharing. If run locally, an operation at this scale would have monopolized a typical academic data centre for many months, and would have presented major challenges for data storage and distribution. However, this scale is increasingly typical for genomics projects and necessitates a rethink of how analytical tools are packaged and moved to the data. For {PCAWG}, we embraced the use of highly portable Docker images for encapsulating and sharing complex alignment and variant calling workflows across highly variable environments. While successful, this endeavor revealed a limitation in Docker containers, namely the lack of a standardized way to describe and execute the tools encapsulated inside the container. As a result, we created the Dockstore ( https://dockstore.org), a project that brings together Docker images with standardized, machine-readable ways of describing and running the tools contained within. This service greatly improves the sharing and reuse of genomics tools and promotes interoperability with similar projects through emerging web service standards developed by the Global Alliance for Genomics and Health ({GA4GH}).}
}
@article{ditommaso_2017,
title = {Nextflow enables reproducible computational workflows.},
author = {Di Tommaso, Paolo and Chatzou, Maria and Floden, Evan W and Barja, Pablo Prieto and Palumbo, Emilio and Notredame, Cedric},
pages = {316-319},
year = {2017},
month = {apr},
day = {11},
urldate = {2018-07-13},
journal = {Nat Biotechnol},
volume = {35},
number = {4},
doi = {10.1038/nbt.3820},
pmid = {28398311},
f1000-projects = {{CWL} and {CWLProv} and Debianpaper}
}
@online{nextflow_tracing,
title = {Nextflow: Tracing and visualization },
author = {{Centre for Genomic Regulation (CRG)}},
year = 2018,
url = {https://www.nextflow.io/docs/latest/tracing.html#trace-report},
urldate = {2018-11-28},
note = {Accessed 28 Nov 2018}
}
@misc{NogalesPoster2018,
author = "Garriga Nogales, Edgar and Di Tommaso, Paolo and Notredame, Cedric",
title = "{{Nextflow integration for the Research Object Specification}}",
publisher = "Figshare",
year = "2018",
month = "10",
note = { Poster at Workshop on Research Objects (RO2018)},
doi = "10.5281/zenodo.1472384"
}
@proceedings{nogales_abstract_2018,
title = {{Nextflow integration for the Research Object
Specification}},
year = 2018,
publisher = {Zenodo},
month = jul,
note = {{Poster. At Workshop on Research Objects (RO 2018), 29 Oct 2018, Amsterdam, Netherlands.}},
doi = {10.5281/zenodo.1472385}
}
@article{guimera_2012,
title = {bcbio-nextgen: Automated, distributed next-gen sequencing pipeline},
author = {Guimera, Roman Valls},
pages = {30},
year = {2012},
month = {feb},
day = {28},
urldate = {2017-09-07},
journal = {{EMBnet} j.},
volume = {17},
number = {B},
issn = {2226-6089},
doi = {10.14806/ej.17.B.286},
f1000-projects = {{CWL} and {CWLProv} and Debianpaper}
}
@article{hettne_2014,
title = {Structuring research methods and data with the research object model: genomics workflows as a case study.},
author = {Hettne, Kristina M and Dharuri, Harish and Zhao, Jun and Wolstencroft, Katherine and Belhajjame, Khalid and Soiland-Reyes, Stian and Mina, Eleni and Thompson, Mark and Cruickshank, Don and Verdes-Montenegro, Lourdes and Garrido, Julian and de Roure, David and Corcho, Oscar and Klyne, Graham and van Schouwen, Reinout and 't Hoen, Peter A C and Bechhofer, Sean and Goble, Carole and Roos, Marco},
pages = {41},
year = {2014},
month = {sep},
day = {18},
urldate = {2018-07-13},
journal = {J Biomed Semantics},
volume = {5},
number = {1},
doi = {10.1186/2041-1480-5-41},
pmid = {25276335},
pmcid = {PMC4177597},
f1000-projects = {{CWLProv} and Your publications},
abstract = {{BACKGROUND}: One of the main challenges for biomedical research lies in the computer-assisted integrative study of large and increasingly complex combinations of data in order to understand molecular mechanisms. The preservation of the materials and methods of such computational experiments with clear annotations is essential for understanding an experiment, and this is increasingly recognized in the bioinformatics community. Our assumption is that offering means of digital, structured aggregation and annotation of the objects of an experiment will provide necessary meta-data for a scientist to understand and recreate the results of an experiment. To support this we explored a model for the semantic description of a workflow-centric Research Object ({RO}), where an {RO} is defined as a resource that aggregates other resources, e.g., datasets, software, spreadsheets, text, etc. We applied this model to a case study where we analysed human metabolite variation by workflows. {RESULTS}: We present the application of the workflow-centric {RO} model for our bioinformatics case study. Three workflows were produced following recently defined Best Practices for workflow design. By modelling the experiment as an {RO}, we were able to automatically query the experiment and answer questions such as "which particular data was input to a particular workflow to test a particular hypothesis?", and "which particular conclusions were drawn from a particular workflow?". {CONCLUSIONS}: Applying a workflow-centric {RO} model to aggregate and annotate the resources used in a bioinformatics experiment, allowed us to retrieve the conclusions of the experiment in the context of the driving hypothesis, the executed workflows and their input data. The {RO} model is an extendable reference model that can be used by other systems as well. {AVAILABILITY}: The Research Object is available at http://www.myexperiment.org/packs/428 The {Wf4Ever} Research Object Model is available at https://w3id.org/ro/2016-01-28/.}
}
@article{kurtzer_2017,
title = {Singularity: Scientific containers for mobility of compute.},
author = {Kurtzer, Gregory M and Sochat, Vanessa and Bauer, Michael W},
pages = {e0177459},
year = {2017},
month = {may},
day = {11},
urldate = {2017-11-03},
journal = {{PLoS} {ONE}},
volume = {12},
number = {5},
doi = {10.1371/journal.pone.0177459},
pmid = {28494014},
pmcid = {PMC5426675},
f1000-projects = {{CWLProv} and Debianpaper},
abstract = {Here we present Singularity, software developed to bring containers and reproducibility to scientific computing. Using Singularity containers, developers can work in reproducible environments of their choosing and design, and these complete environments can easily be copied and executed on other platforms. Singularity is an open source initiative that harnesses the expertise of system and software engineers and researchers alike, and integrates seamlessly into common workflows for both of these groups. As its primary use case, Singularity brings mobility of computing to both users and {HPC} centers, providing a secure means to capture and distribute software and compute environments. This ability to create and deploy reproducible environments across these centers, a previously unmet need, makes Singularity a game changing development for computational science.}
}
@inproceedings{robinson_2017,
title = {Common Workflow Language Viewer},
author = {Robinson, Mark and Soiland-Reyes, Stian and Crusoe, Michael R. and Goble, Carole},
url = {https://view.commonwl.org/},
year = {2017},
month = {jul},
day = {22},
urldate = {2017-08-16},
f1000-projects = {{CWLProv} and Your publications},
abstract = {The Common Workflow Language ({CWL}) project emerged from the {BOSC} 2014 Codefest as a grassroots, multi-vendor working group to tackle the portability of data analysis workflows. It\textquoterights specification for describing workflows and command line tools aims to make them portable and scalable across a variety of computing platforms. At its heart {CWL} is a set of structured text files ({YAML}) with various extensibility points to the format. However, the {CWL} syntax and multi-file collections are not conducive to workflow browsing, exchange and understanding: for this we need a visualization suite.{CWL} Viewer is a richly featured {CWL} visualization suite that graphically presents and lists the details of {CWL} workflows with their inputs, outputs and steps. It also packages the {CWL} files into a downloadable Research Object Bundle including attribution, versioning and dependency metadata in the manifest, allowing it to be easily shared. The tool operates over any workflow held in a {GitHub} repository. Other features include: path visualization from parents and children nodes; nested workflows support; workflow graph download in a range of image formats; a gallery of previously submitted workflows; and support for private git repositories and public {GitHub} including live updates over versioned workflows. The {CWL} Viewer is the de facto {CWL} visualization suite and has been enthusiastically received by the {CWL} community.Project Website: https://view.commonwl.org/Source Code: https://github.com/common-workflow-language/cwlviewer(https://doi.org/10.5281/zenodo.823535)Software License: Apache License, Version 2.{0Submitted} abstract: {CWL} Viewer: The Common Workflow Language {ViewerPoster}: https://doi.org/10.7490/f1000research.1114375.{1Technical} Report: Reproducible Research using Research Objects(https://doi.org/10.5281/zenodo.823295)}
}
@inproceedings{robinson_2017a,
title = {{CWL} Viewer},
author = {Robinson, Mark and Soiland-Reyes, Stian and Crusoe, Michael R. and Goble, Carole},
url = {https://view.commonwl.org/},
year = {2017},
month = {jul},
day = {6},
urldate = {2017-08-16},
f1000-projects = {{CWLProv} and Your publications},
abstract = {The Common Workflow Language ({CWL}) project emerged from the {BOSC} 2014 Codefest as a grassroots, multi-vendor working group to tackle the portability of data analysis workflows. It\textquoterights specification for describing workflows and command line tools aims to make them portable and scalable across a variety of computing platforms. At its heart {CWL} is a set of structured text files ({YAML}) with various extensibility points to the format. However, the {CWL} syntax and multi-file collections are not conducive to workflow browsing, exchange and understanding: for thiswe need a visualization suite. {CWL} Viewer is a richly featured {CWL} visualization suite that graphically presents and lists the details of {CWL} workflows with their inputs, outputs and steps. It also packages the {CWL} files into a downloadable Research Object Bundle including attribution, versioning and dependency metadata in the manifest, allowing it to be easily shared. The tool operates over any workflow held in a {GitHub} repository. Other features include: path visualization from parents and children nodes; nested workflows support; workflow graph download in a range of image formats; a gallery of previously submitted workflows; and support for private git repositories and public {GitHub} including live updates over versioned workflows. The {CWL} Viewer is the de facto {CWL} visualization suite and has been enthusiastically received by the {CWL} community}
}
@article{mcmurry_2017,
title = {Identifiers for the 21st century: How to design, provision, and reuse persistent identifiers to maximize utility and impact of life science data.},
author = {{McMurry}, Julie A and Juty, Nick and Blomberg, Niklas and Burdett, Tony and Conlin, Tom and Conte, Nathalie and Courtot, Mélanie and Deck, John and Dumontier, Michel and Fellows, Donal K and Gonzalez-Beltran, Alejandra and Gormanns, Philipp and Grethe, Jeffrey and Hastings, Janna and Hériché, Jean-Karim and Hermjakob, Henning and Ison, Jon C and Jimenez, Rafael C and Jupp, Simon and Kunze, John and Laibe, Camille and Le Novère, Nicolas and Malone, James and Martin, Maria Jesus and {McEntyre}, Johanna R and Morris, Chris and Muilu, Juha and Müller, Wolfgang and Rocca-Serra, Philippe and Sansone, Susanna-Assunta and Sariyar, Murat and Snoep, Jacky L and Soiland-Reyes, Stian and Stanford, Natalie J and Swainston, Neil and Washington, Nicole and Williams, Alan R and Wimalaratne, Sarala M and Winfree, Lilly M and Wolstencroft, Katherine and Goble, Carole and Mungall, Christopher J and Haendel, Melissa A and Parkinson, Helen},
pages = {e2001414},
year = {2017},
month = {jun},
day = {29},
urldate = {2018-07-13},
journal = {{PLoS} Biol},
volume = {15},
number = {6},
doi = {10.1371/journal.pbio.2001414},
pmid = {28662064},
pmcid = {PMC5490878},
f1000-projects = {{CWLProv} and Your publications},
abstract = {In many disciplines, data are highly decentralized across thousands of online databases (repositories, registries, and knowledgebases). Wringing value from such databases depends on the discipline of data science and on the humble bricks and mortar that make integration possible; identifiers are a core component of this integration infrastructure. Drawing on our experience and on work by other groups, we outline 10 lessons we have learned about the identifier qualities and best practices that facilitate large-scale data integration. Specifically, we propose actions that identifier practitioners (database providers) should take in the design, provision and reuse of identifiers. We also outline the important considerations for those referencing identifiers in various circumstances, including by authors and data generators. While the importance and relevance of each lesson will vary by context, there is a need for increased awareness about how to avoid and manage common identifier problems, especially those related to persistence and web-accessibility/resolvability. We focus strongly on web-based identifiers in the life sciences; however, the principles are broadly relevant to other disciplines.}
}
@inproceedings{khan_2017,
title = {{CWL}+Research Object == Complete Provenance},
author = {Khan, Farah Zaib and Soiland-Reyes, Stian and Lonie, Andrew and Sinnott, Richard},
url = {https://github.com/common-workflow-language/common-workflow-language/wiki/Research-Object-Proposal},
year = {2017},
month = {jun},
day = {14},
urldate = {2017-08-16},
f1000-projects = {{CWLProv} and Your publications},
abstract = {The term Provenance is referred to as {\textquoteleftThe} beginning of something\textquoterights existence; something\textquoterights origin\textquoteright Or {\textquoteleftA} record of ownership of a work of art or an antique, used as a guide to authenticity or quality\textquoteright. Provenance tracking is crucial in scientific studies where workflows have emerged as an exemplar approach to mechanize data-intensive analyses. Gil et al. analyze challenges of scientific workflows and concluded that formally specified workflow helps\textquoteleftaccelerate the rate of scientific process\textquoteright and facilitates others to reproduce the given experiment provided that provenance of end-to-end process at every level is captured.We have implemented exemplar {GATK} variant calling workflow using three approaches to workflow definition namely Galaxy, {CWL} and Cpipe to identify assumptions implicit in these approaches. These assumptions lead to limited or no understanding of reproducibility requirements due to lack of documentation and comprehensive provenance tracking and resulted in identification of provenance information crucial for genomic workflows.{CWL} provides a declarative approach to workflow declaration making minimal assumptions about precise software environment, base software dependencies, configuration settings, alteration of parameters and software versions. It aims to provide an open source extensible standard to build flexible and customized workflows including intricate details of every process. It facilitates capture of information by supporting declaration of requirements, `cwl:tool` and checksums etc. Currently, there is no mechanism to gather the produced information as a result of a workflow run into one bundle for future use. We propose to demonstrate the implementation of a module for {CWL}.}
}
@inproceedings{chard_2016,
title = {I'll take that to go: Big data bags and minimal identifiers for exchange of large, complex datasets},
author = {Chard, Kyle and D'Arcy, Mike and Heavner, Ben and Foster, Ian and Kesselman, Carl and Madduri, Ravi and Rodriguez, Alexis and Soiland-Reyes, Stian and Goble, Carole and Clark, Kristi and Deutsch, Eric W. and Dinov, Ivo and Price, Nathan and Toga, Arthur},
pages = {319-328},
publisher = {IEEE},
year = {2016},
month = {dec},
day = {5},
urldate = {2018-07-13},
isbn = {978-1-4673-9005-7},
doi = {10.1109/bigdata.2016.7840618},
f1000-projects = {{CWLProv} and Your publications},
abstract = {Big data workflows often require the assembly and exchange of complex, multi-element datasets. For example, in biomedical applications, the input to an analytic pipeline can be a dataset consisting thousands of images and genome sequences assembled from diverse repositories, requiring a description of the contents of the dataset in a concise and unambiguous form. Typical approaches to creating datasets for big data workflows assume that all data reside in a single location, requiring costly data marshaling and permitting errors of omission and commission because dataset members are not explicitly specified. We address these issues by proposing simple methods and tools for assembling, sharing, and analyzing large and complex datasets that scientists can easily integrate into their daily workflows. These tools combine a simple and robust method for describing data collections ({BDBags}), data descriptions (Research Objects), and simple persistent identifiers (Minids) to create a powerful ecosystem of tools and services for big data analysis and sharing. We present these tools and use biomedical case studies to illustrate their use for the rapid assembly, sharing, and analysis of large datasets.},
booktitle = {2016 {IEEE} International Conference on Big Data (Big Data)}
}
@article{tavernaprov,
author = {Soiland-Reyes, Stian and Alper, Pinar and Goble, Carole},
title = {Tracking workflow execution with {TavernaProv}.},
note = {PROV Three Years Later; workshop at Provenance Week 2016, McLean, Virginia, USA.},
doi = {10.5281/zenodo.51314},
year = {2016},
month = {jun},
day = {6},
}
@book{nies_2014,
title = {{PROV}-Dictionary: Modeling Provenance for Dictionary Data Structures},
author = {Nies, Tom De and Coppens, Sam and Missier, Paolo and Moreau, Luc and Cheney, James and Lebo, Timothy and Soiland-Reyes, Stian},
publisher = {W3C},
year = {2014},
month = {apr},
day = {30},
urldate = {2017-08-16},
f1000-projects = {{CWLProv} and Your publications},
abstract = {Provenance is information about entities, activities, and people involved in producing a piece of data or thing, which can be used to form assessments about its quality, reliability or trustworthiness. This document describes extensions to {PROV} to facilitate the modeling of provenance for dictionary data structures. {PROV}-{DM} specifies a Collection as an entity that provides a structure to some constituents, which are themselves entities. However, some applications may need a mechanism to specify more structure to a Collection, in order to accurately describe its provenance. Therefore, in this document, we introduce Dictionary, a specific type of Collection with a logical structure consisting of key-entity pairs.}
}
@article{ciccarese_2013a,
title = {Web Annotation as a First-Class Object},
author = {Ciccarese, Paolo and Soiland-Reyes, Stian and Clark, Tim},
pages = {71-75},
year = {2013},
month = {nov},
urldate = {2018-07-13},
journal = {{IEEE} Internet Comput},
volume = {17},
number = {6},
issn = {1089-7801},
doi = {10.1109/{MIC}.2013.123},
f1000-projects = {{CWLProv} and Your publications}
}
@online{moreau_2013,
title = {{PROV}-N: The Provenance Notation},
author = {Moreau, Luc and Missier, Paolo and Cheney, James and Soiland-Reyes, Stian},
year = {2013},
month = {apr},
day = {30},
url = {http://www.w3.org/TR/2013/REC-prov-n-20130430/},
note = {W3C Recommendation 30 April 2013},
f1000-projects = {{CWLProv} and Your publications},
abstract = {Provenance is information about entities, activities, and people involved in producing a piece of data or thing, which can be used to form assessments about its quality, reliability or trustworthiness. {PROV}-{DM} is the conceptual data model that forms a basis for the {W3C} provenance ({PROV}) family of specifications. {PROV}-{DM} distinguishes core structures, forming the essence of provenance information, from extended structures catering for more specific uses of provenance. {PROV}-{DM} is organized in six components, respectively dealing with: (1) entities and activities, and the time at which they were created, used, or ended; (2) derivations of entities from entities; (3) agents bearing responsibility for entities that were generated and activities that happened; (4) a notion of bundle, a mechanism to support provenance of provenance; and, (5) properties to link entities that refer to the same thing; (6) collections forming a logical structure for its members.To provide examples of the {PROV} data model, the {PROV} notation ({PROV}-N) is introduced: aimed at human consumption, {PROV}-N allows serializations of {PROV} instances to be created in a compact manner. {PROV}-N facilitates the mapping of the {PROV} data model to concrete syntax, and is used as the basis for a formal semantics of {PROV}. The purpose of this document is to define the {PROV}-N notation.}
}
@online{PROVN,
author = {Moreau, Luc and Missier, Paolo and Cheney, James and Soiland-Reyes, Stian},
title = {PROV-N: The Provenance Notation},
url = {http://www.w3.org/TR/2013/REC-prov-n-20130430/},
year = {2013},
month = {apr},
day = {30},
urldate = {2018-09-22},
note = {W3C Recommendation 30 April 2013}
}
@article{wings2011,
author={Y. {Gil} and V. {Ratnakar} and J. {Kim} and P. {Gonzalez-Calero} and P. {Groth} and J. {Moody} and E. {Deelman}},
journal={IEEE Intelligent Systems},
title={Wings: Intelligent Workflow-Based Design of Computational Experiments},
year={2011},
volume={26},
number={1},
pages={62-72},
keywords={data analysis;natural sciences computing;planning (artificial intelligence);software architecture;workflow management software;workflow system;workflow creation;workflow execution;workflow instance generation and specialization;University of Southern California;Information Sciences Institute;workflow validation;Al planning;data set requirement;Computational intelligence;Tracking;Workflow management software;Design methodology;intelligent systems;workflow management;computational experiments;experiment design;software components;computer-supported discovery},
doi={10.1109/MIS.2010.9},
ISSN={1541-1672},
month={Jan},}
@inproceedings{belhajjame_2013,
title = {A workflow {PROV}-corpus based on Taverna and Wings},
author = {Belhajjame, Khalid and Zhao, Jun and Garijo, Daniel and Garrido, Aleix and Soiland-Reyes, Stian and Alper, Pinar and Corcho, Oscar},
pages = {331},
publisher = {{ACM} Press},
url = {http://dl.acm.org/citation.cfm?doid=2457317.2457376},
year = {2013},
month = {mar},
day = {18},
urldate = {2018-07-13},
isbn = {9781450315999},
doi = {10.1145/2457317.2457376},
address = {New York, New York, {USA}},
f1000-projects = {{CWLProv} and Your publications},
booktitle = {Proceedings of the Joint {EDBT}/{ICDT} 2013 Workshops on - {EDBT} '13}
}
@inproceedings{belhajjame_2012,
title = {Workflow-centric research objects: First class citizens in scholarly discourse},
author = {Belhajjame, Khalid and Corcho, Oscar and Garijo, Daniel and Zhao, Jun and Missier, Paolo and Newman, David and Palma, Raúl and Bechhofer, Sean and García Cuesta, Esteban and Gómez-Pérez, José Manuel and Klyne, Graham and Page, Kevin and Roos, Marco and Ruiz, José Enrique and Soiland-Reyes, Stian and Verdes-Montenegro, Lourdes and De Roure, David and Goble, Carole A.},
pages = {1-12},
year = {2012},
url = {http://ceur-ws.org/Vol-903/paper-01.pdf},
urldate = {2017-08-16},
booktitle = {Proceedings of the 2nd Workshop on Semantic Publishing ({SePublica 2012})},
journal = {{CEUR} Workshop Proceedings},
volume = {903},
issn = {1613-0073},
series = {{CEUR} Workshop Proceedings}
}
@inproceedings{belhajjame_2011,
title = {Fostering Scientific Workflow Preservation through Discovery of Substitute Services},
author = {Belhajjame, Khalid and Goble, Carole and Soiland-Reyes, Stian and De Roure, Davide},
pages = {97-104},
publisher = {IEEE},
year = {2011},
month = {dec},
day = {5},
urldate = {2018-07-13},
isbn = {978-1-4577-2163-2},
doi = {10.1109/eScience.2011.22},
f1000-projects = {{CWLProv} and Your publications},
booktitle = {2011 {IEEE} Seventh International Conference on {eScience}}
}
@incollection{missier_2010,
booktitle = {Scientific and statistical database management},
title = {Taverna, Reloaded},
author = {Missier, Paolo and Soiland-Reyes, Stian and Owen, Stuart and Tan, Wei and Nenadic, Alexandra and Dunlop, Ian and Williams, Alan and Oinn, Tom and Goble, Carole},
editor = {Gertz, Michael and Ludäscher, Bertram and Hutchison, David and Kanade, Takeo and Kittler, Josef and Kleinberg, Jon M. and Mattern, Friedemann and Mitchell, John C. and Naor, Moni and Nierstrasz, Oscar and Pandu Rangan, C. and Steffen, Bernhard and Sudan, Madhu and Terzopoulos, Demetri and Tygar, Doug and Vardi, Moshe Y. and Weikum, Gerhard},
series = {Lecture notes in computer science},
pages = {471-481},
publisher = {Springer Berlin Heidelberg},
year = {2010},
urldate = {2017-08-16},
volume = {6187},
isbn = {978-3-642-13817-1},
issn = {0302-9743},
doi = {10.1007/978-3-642-13818-8\_33},
address = {Berlin, Heidelberg},
f1000-projects = {{CWLProv} and Your publications}
}
@article{garijo_2017,
title = {Abstract, link, publish, exploit: An end to end framework for workflow sharing},
author = {Garijo, D and Gil, Y and Corcho, O},
pages = {271-283},
year = {2017},
month = {oct},
urldate = {2018-07-13},
journal = {Future Generation Computer Systems},
volume = {75},
issn = {0167739X},
doi = {10.1016/j.future.2017.01.008},
f1000-projects = {{CWLProv} and Linked Data},
abstract = {Scientific workflows are increasingly used to manage and share scientific computations and methods to analyze data. A variety of systems have been developed that store the workflows executed and make them part of public repositories However, workflows are published in the idiosyncratic format of the workflow system used for the creation and execution of the workflows. Browsing, linking and using the stored workflows and their results often becomes a challenge for scientists who may only be familiar with one system. In this paper we present an approach for addressing this issue by publishing and exploiting workflows as data on the Web with a representation that is independent from the workflow system used to create them. In order to achieve our goal, we follow the Linked Data Principles to publish workflow inputs, intermediate results, outputs and codes; and we reuse and extend well established standards like {W3C} {PROV}. We illustrate our approach by publishing workflows and consuming them with different tools designed to address common scenarios for workflow exploitation.}
}
@article{cohen2017scientific,
title = {Scientific workflows for computational reproducibility in the life sciences: Status, challenges and opportunities},
author = {Cohen-Boulakia, Sarah and Belhajjame, Khalid and Collin, Olivier and Chopard, Jérôme and Froidevaux, Christine and Gaignard, Alban and Hinsen, Konrad and Larmande, Pierre and Bras, Yvan Le and Lemoine, Frédéric and Mareuil, Fabien and Ménager, Hervé and Pradal, Christophe and Blanchet, Christophe},
pages = {284-298},
year = {2017},
month = {oct},
urldate = {2018-07-13},
journal = {Future Generation Computer Systems},
volume = {75},
issn = {0167739X},
doi = {10.1016/j.future.2017.01.012},
f1000-projects = {{CWL} and {CWLProv} and Workflows},
abstract = {With the development of new experimental technologies, biologists are faced with an avalanche of data to be computationally analyzed for scientific advancements and discoveries to emerge. Faced with the complexity of analysis pipelines, the large number of computational tools, and the enormous amount of data to manage, there is compelling evidence that many if not most scientific discoveries will not stand the test of time: increasing the reproducibility of computed results is of paramount importance. The objective we set out in this paper is to place scientific workflows in the context of reproducibility. To do so, we define several kinds of reproducibility that can be reached when scientific workflows are used to perform experiments. We characterize and define the criteria that need to be catered for by reproducibility-friendly scientific workflow systems, and use such criteria to place several representative and widely used workflow systems and companion tools within such a framework. We also discuss the remaining challenges posed by reproducible scientific workflows in the life sciences. Our study was guided by three use cases from the life science domain involving in silico experiments.}
}
@inproceedings{dahuo_2015,
title = {Smart Container: an ontology towards conceptualizing Docker},
author = {Da Huo, and Nabrzyski, Jaroslaw and {II}, Charles},
series = {{CEUR} Workshop Proceedings},
url = {http://ceur-ws.org/Vol-1486/paper\_89.pdf},
year = {2015},
month = {oct},
day = {11},
urldate = {2017-10-04},
volume = {1486},
issn = {1613-0073},
f1000-projects = {{CWLProv} and Linked Data and Reproducibility},
booktitle = {Proceedings of the {ISWC} 2015 Posters \& Demonstrations Track. Co-located with the 14th International Semantic Web Conference ({ISWC}-2015)}
}
@misc{robinson_other_2017,
title = {{CWL} Viewer: The Common Workflow Language viewer},
author = {Robinson, Mark and Soiland-Reyes, Stian and Crusoe, Michael R and Goble, Carole},
year = {2017},
month = {jul},
day = {22},
urldate = {2017-11-03},
volume = {6},
f1000-projects = {{CWL} and {CWLProv} and Debianpaper},
type = {OTHER}
}
@article{kanwal_2017,
title = {Investigating reproducibility and tracking provenance - A genomic workflow case study.},
author = {Kanwal, Sehrish and Khan, Farah Zaib and Lonie, Andrew and Sinnott, Richard O},
pages = {337},
year = {2017},
month = {jul},
day = {12},
urldate = {2017-11-03},
journal = {{BMC} Bioinformatics},
volume = {18},
number = {1},
doi = {10.1186/s12859-017-1747-0},
pmid = {28701218},
pmcid = {PMC5508699}
}