-
Notifications
You must be signed in to change notification settings - Fork 13
/
references.bib
2573 lines (2287 loc) · 177 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@book{gelman_data_2006,
title = {Data analysis using regression and multilevel/hierarchical models},
publisher = {Cambridge university press},
author = {Gelman, Andrew and Hill, Jennifer},
year = {2006},
file = {Full Text:/Users/micl/Zotero/storage/7G98QCP2/Gelman and Hill - 2006 - Data analysis using regression and multilevelhier.pdf:application/pdf;Snapshot:/Users/micl/Zotero/storage/MKR5KG4R/books.html:text/html},
}
@book{gelman_bayesian_2013,
title = {Bayesian {Data} {Analysis}, {Third} {Edition}},
isbn = {978-1-4398-4095-5},
abstract = {Now in its third edition, this classic book is widely considered the leading text on Bayesian methods, lauded for its accessible, practical approach to analyzing data and solving research problems. Bayesian Data Analysis, Third Edition continues to take an applied approach to analysis using up-to-date Bayesian methods. The authors—all leaders in the statistics community—introduce basic concepts from a data-analytic perspective before presenting advanced methods. Throughout the text, numerous worked examples drawn from real applications and research emphasize the use of Bayesian inference in practice. New to the Third Edition Four new chapters on nonparametric modeling Coverage of weakly informative priors and boundary-avoiding priors Updated discussion of cross-validation and predictive information criteria Improved convergence monitoring and effective sample size calculations for iterative simulation Presentations of Hamiltonian Monte Carlo, variational Bayes, and expectation propagation New and revised software code The book can be used in three different ways. For undergraduate students, it introduces Bayesian inference starting from first principles. For graduate students, the text presents effective current approaches to Bayesian modeling and computation in statistics and related fields. For researchers, it provides an assortment of Bayesian methods in applied statistics. Additional materials, including data sets used in the examples, solutions to selected exercises, and software instructions, are available on the book’s web page.},
language = {en},
publisher = {CRC Press},
author = {Gelman, Andrew and Carlin, John B. and Stern, Hal S. and Dunson, David B. and Vehtari, Aki and Rubin, Donald B.},
month = nov,
year = {2013},
keywords = {Mathematics / Probability \& Statistics / General, Computers / Mathematical \& Statistical Software, Psychology / Research \& Methodology},
}
@book{kruschke_doing_2010,
title = {Doing {Bayesian} {Data} {Analysis}: {A} {Tutorial} {Introduction} with {R}},
isbn = {978-0-12-381486-9},
shorttitle = {Doing {Bayesian} {Data} {Analysis}},
abstract = {There is an explosion of interest in Bayesian statistics, primarily because recently created computational methods have finally made Bayesian analysis tractable and accessible to a wide audience. Doing Bayesian Data Analysis, A Tutorial Introduction with R and BUGS, is for first year graduate students or advanced undergraduates and provides an accessible approach, as all mathematics is explained intuitively and with concrete examples. It assumes only algebra and ‘rusty’ calculus. Unlike other textbooks, this book begins with the basics, including essential concepts of probability and random sampling. The book gradually climbs all the way to advanced hierarchical modeling methods for realistic data. The text provides complete examples with the R programming language and BUGS software (both freeware), and begins with basic programming examples, working up gradually to complete programs for complex analyses and presentation graphics. These templates can be easily adapted for a large variety of students and their own research needs.The textbook bridges the students from their undergraduate training into modern Bayesian methods.-Accessible, including the basics of essential concepts of probability and random sampling -Examples with R programming language and BUGS software -Comprehensive coverage of all scenarios addressed by non-bayesian textbooks- t-tests, analysis of variance (ANOVA) and comparisons in ANOVA, multiple regression, and chi-square (contingency table analysis). -Coverage of experiment planning -R and BUGS computer programming code on website -Exercises have explicit purposes and guidelines for accomplishment},
language = {en},
publisher = {Academic Press},
author = {Kruschke, John},
month = nov,
year = {2010},
keywords = {Mathematics / General, Mathematics / Applied},
}
@article{ferrari_beta_2004,
title = {Beta {Regression} for {Modelling} {Rates} and {Proportions}},
volume = {31},
issn = {0266-4763},
url = {http://www.tandfonline.com/doi/abs/10.1080/0266476042000214501},
doi = {10.1080/0266476042000214501},
abstract = {This paper proposes a regression model where the response is beta distributed using a parameterization of the beta law that is indexed by mean and dispersion parameters. The proposed model is useful for situations where the variable of interest is continuous and restricted to the interval (0, 1) and is related to other variables through a regression structure. The regression parameters of the beta regression model are interpretable in terms of the mean of the response and, when the logit link is used, of an odds ratio, unlike the parameters of a linear regression that employs a transformed response. Estimation is performed by maximum likelihood. We provide closed-form expressions for the score function, for Fisher's information matrix and its inverse. Hypothesis testing is performed using approximations obtained from the asymptotic normality of the maximum likelihood estimator. Some diagnostic measures are introduced. Finally, practical applications that employ real data are presented and discussed.},
number = {7},
urldate = {2014-03-13},
journal = {Journal of Applied Statistics},
author = {Ferrari, Silvia and Cribari-Neto, Francisco},
year = {2004},
pages = {799--815},
file = {Snapshot:/Users/micl/Zotero/storage/JZKNU8TH/0266476042000214501.html:text/html},
}
@book{barrett_causal_2024,
title = {Causal {Inference} in {R}},
url = {https://www.r-causal.org/},
language = {en},
urldate = {2023-12-02},
author = {Barrett, Malcolm and McGowan, Lucy D’Agostino and Gerke, Travis},
year = {2024},
file = {Snapshot:/Users/micl/Zotero/storage/SDSTQ6EG/www.r-causal.org.html:text/html},
}
@book{hastie_elements_2017,
title = {Elements of {Statistical} {Learning}: data mining, inference, and prediction. 2nd {Edition}.},
url = {https://hastie.su.domains/ElemStatLearn/},
urldate = {2023-12-02},
author = {Hastie, Trevor and Tibshirani, Robert and Friedman, Jerome},
year = {2017},
file = {Elements of Statistical Learning\: data mining, inference, and prediction. 2nd Edition.:/Users/micl/Zotero/storage/YC7KFXJY/ElemStatLearn.html:text/html},
}
@book{goodfellow_deep_2016,
title = {Deep {Learning}},
url = {https://www.deeplearningbook.org/},
urldate = {2023-12-02},
author = {Goodfellow, Ian and Bengio, Yoshua and Courville, Aaron},
year = {2016},
file = {Deep Learning:/Users/micl/Zotero/storage/GKAVKPVP/www.deeplearningbook.org.html:text/html},
}
@misc{scikit-learn_116_2023,
title = {1.16. {Probability} calibration},
url = {https://scikit-learn/stable/modules/calibration.html},
abstract = {When performing classification you often want not only to predict the class label, but also obtain a probability of the respective label. This probability gives you some kind of confidence on the p...},
language = {en},
urldate = {2023-12-02},
journal = {scikit-learn},
author = {{scikit-learn}},
year = {2023},
file = {Snapshot:/Users/micl/Zotero/storage/J44HYBR6/calibration.html:text/html},
}
@book{james_introduction_2021,
address = {New York, NY},
series = {Springer {Texts} in {Statistics}},
title = {An {Introduction} to {Statistical} {Learning}},
volume = {103},
isbn = {978-1-4614-7137-0 978-1-4614-7138-7},
url = {http://link.springer.com/10.1007/978-1-4614-7138-7},
urldate = {2023-12-02},
publisher = {Springer New York},
author = {James, Gareth and Witten, Daniela and Hastie, Trevor and Tibshirani, Robert},
year = {2021},
doi = {10.1007/978-1-4614-7138-7},
file = {Submitted Version:/Users/micl/Zotero/storage/KS3SIBUU/James et al. - 2013 - An Introduction to Statistical Learning.pdf:application/pdf},
}
@article{lang_mlr3_2019,
title = {mlr3: {A} modern object-oriented machine learning framework in {R}},
volume = {4},
issn = {2475-9066},
shorttitle = {mlr3},
url = {https://joss.theoj.org/papers/10.21105/joss.01903},
doi = {10.21105/joss.01903},
number = {44},
urldate = {2023-12-02},
journal = {Journal of Open Source Software},
author = {Lang, Michel and Binder, Martin and Richter, Jakob and Schratz, Patrick and Pfisterer, Florian and Coors, Stefan and Au, Quay and Casalicchio, Giuseppe and Kotthoff, Lars and Bischl, Bernd},
month = dec,
year = {2019},
pages = {1903},
file = {Full Text:/Users/micl/Zotero/storage/4ZZCSE38/Lang et al. - 2019 - mlr3 A modern object-oriented machine learning fr.pdf:application/pdf},
}
@book{gelman_regression_2020,
edition = {1},
title = {Regression and {Other} {Stories}},
isbn = {978-1-139-16187-9 978-1-107-02398-7 978-1-107-67651-0},
url = {https://www.cambridge.org/highereducation/product/9781139161879/book},
abstract = {Most textbooks on regression focus on theory and the simplest of examples. Real statistical problems, however, are complex and subtle. This is not a book about the theory of regression. It is about using regression to solve real problems of comparison, estimation, prediction, and causal inference. Unlike other books, it focuses on practical issues such as sample size and missing data and a wide range of goals and techniques. It jumps right in to methods and computer code you can use immediately. Real examples, real stories from the authors' experience demonstrate what regression can do and its limitations, with practical advice for understanding assumptions and implementing methods for experiments and observational studies. They make a smooth transition to logistic regression and GLM. The emphasis is on computation in R and Stan rather than derivations, with code available online. Graphics and presentation aid understanding of the models and model fitting.},
urldate = {2023-12-02},
publisher = {Cambridge University Press},
author = {Gelman, Andrew and Hill, Jennifer and Vehtari, Aki},
month = jul,
year = {2020},
doi = {10.1017/9781139161879},
}
@book{wooldridge_introductory_2012,
address = {Mason, OH},
edition = {5th edition},
title = {Introductory {Econometrics}: {A} {Modern} {Approach}},
isbn = {978-1-111-53104-1},
shorttitle = {Introductory {Econometrics}},
abstract = {Discover how empirical researchers today actually think about and apply econometric methods with the practical, professional approach in Wooldridge�s INTRODUCTORY ECONOMETRICS: A MODERN APPROACH, 5E. Unlike traditional books on the subject, INTRODUCTORY ECONOMETRICS� unique presentation demonstrates how econometrics has moved beyond just a set of abstract tools to become a genuinely useful tool for answering questions in business, policy evaluation, and forecasting environments. Organized around the type of data being analyzed, the book uses a systematic approach that only introduces assumptions as they are needed, which makes the material easier to understand and ultimately leads to better econometric practices. Packed with timely, relevant applications, the text emphasizes incorporates close to 100 intriguing data sets in six formats and offers updates that reflect the latest emerging developments in the field.},
language = {English},
publisher = {Cengage Learning},
author = {Wooldridge, Jeffrey M.},
month = sep,
year = {2012},
}
@misc{google_machine_2023,
title = {Machine {Learning} {\textbar} {Google} for {Developers}},
url = {https://developers.google.com/machine-learning},
abstract = {Educational resources for machine learning.},
language = {en},
urldate = {2023-12-02},
author = {Google},
year = {2023},
}
@article{rovine_peirce_2004,
title = {Peirce and {Bowditch}},
volume = {58},
issn = {0003-1305},
url = {https://doi.org/10.1198/000313004X964},
doi = {10.1198/000313004X964},
abstract = {Henry Pickering Bowditch and Charles Sanders Peirce made important contributions to the ideas of regression and correlation. This is particularly interesting as these contributions came well before the work of Galton and Pearson. This article discusses the work of Bowditch related to the development of regression and presents Peirce's coefficient of the science of the method, an association coefficient for a 2 × 2 contingency table.},
number = {3},
urldate = {2023-12-02},
journal = {The American Statistician},
author = {Rovine, Michael J and Anderson, Douglas R},
month = aug,
year = {2004},
note = {Publisher: Taylor \& Francis
\_eprint: https://doi.org/10.1198/000313004X964},
keywords = {Association, Coefficient, History of statistics},
pages = {232--236},
}
@book{grolemund_welcome_2023,
title = {Welcome {\textbar} {R} for {Data} {Science}},
url = {https://r4ds.hadley.nz/},
abstract = {This book will teach you how to do data science with R: You’ll learn how to get your data into R, get it into the most useful structure, transform it, visualise it and model it. In this book, you will find a practicum of skills for data science. Just as a chemist learns how to clean test tubes and stock a lab, you’ll learn how to clean data and draw plots—and many other things besides. These are the skills that allow data science to happen, and here you will find the best practices for doing each of these things with R. You’ll learn how to use the grammar of graphics, literate programming, and reproducible research to save time. You’ll also learn how to manage cognitive resources to facilitate discoveries when wrangling, visualising, and exploring data.},
language = {en},
urldate = {2023-12-02},
author = {Grolemund, Hadley Wickham {and} Garrett},
year = {2023},
file = {Snapshot:/Users/micl/Zotero/storage/MKUR5VY6/r4ds.had.co.nz.html:text/html},
}
@book{greene_econometric_2017,
title = {Econometric {Analysis} - 8th {Edition}},
url = {https://pages.stern.nyu.edu/~wgreene/Text/econometricanalysis.htm},
urldate = {2023-12-02},
author = {Greene, William},
year = {2017},
file = {Econometric Analysis - 7th Edition:/Users/micl/Zotero/storage/RJAENYBP/econometricanalysis.html:text/html},
}
@misc{brownlee_gentle_2016,
title = {Gentle {Introduction} to the {Bias}-{Variance} {Trade}-{Off} in {Machine} {Learning}},
url = {https://machinelearningmastery.com/gentle-introduction-to-the-bias-variance-trade-off-in-machine-learning/},
abstract = {Supervised machine learning algorithms can best be understood through the lens of the bias-variance trade-off. In this post, you will discover the Bias-Variance Trade-Off and how to use it to better understand machine learning algorithms and get better performance on your data. Let’s get started. Update Oct/2019: Removed discussion of parametric/nonparametric models (thanks Alex). Overview […]},
language = {en-US},
urldate = {2023-12-03},
journal = {MachineLearningMastery.com},
author = {Brownlee, Jason},
month = mar,
year = {2016},
file = {Snapshot:/Users/micl/Zotero/storage/GBAUFIA6/gentle-introduction-to-the-bias-variance-trade-off-in-machine-learning.html:text/html},
}
@misc{google_introduction_2023,
title = {Introduction {\textbar} {Machine} {Learning}},
url = {https://developers.google.com/machine-learning/decision-forests},
language = {en},
urldate = {2023-12-03},
journal = {Google for Developers},
author = {Google},
year = {2023},
file = {Snapshot:/Users/micl/Zotero/storage/PJY3UGGV/decision-forests.html:text/html},
}
@misc{scikit-learn_nested_2023,
title = {Nested versus non-nested cross-validation},
url = {https://scikit-learn/stable/auto_examples/model_selection/plot_nested_cross_validation_iris.html},
abstract = {This example compares non-nested and nested cross-validation strategies on a classifier of the iris data set. Nested cross-validation (CV) is often used to train a model in which hyperparameters al...},
language = {en},
urldate = {2023-12-03},
journal = {scikit-learn},
author = {{scikit-learn}},
year = {2023},
file = {Snapshot:/Users/micl/Zotero/storage/9FV6VZX3/plot_nested_cross_validation_iris.html:text/html},
}
@misc{schmidhuber_annotated_2022,
title = {Annotated {History} of {Modern} {AI} and {Deep} {Learning}},
url = {http://arxiv.org/abs/2212.11279},
doi = {10.48550/arXiv.2212.11279},
abstract = {Machine learning is the science of credit assignment: finding patterns in observations that predict the consequences of actions and help to improve future performance. Credit assignment is also required for human understanding of how the world works, not only for individuals navigating daily life, but also for academic professionals like historians who interpret the present in light of past events. Here I focus on the history of modern artificial intelligence (AI) which is dominated by artificial neural networks (NNs) and deep learning, both conceptually closer to the old field of cybernetics than to what's been called AI since 1956 (e.g., expert systems and logic programming). A modern history of AI will emphasize breakthroughs outside of the focus of traditional AI text books, in particular, mathematical foundations of today's NNs such as the chain rule (1676), the first NNs (linear regression, circa 1800), and the first working deep learners (1965-). From the perspective of 2022, I provide a timeline of the -- in hindsight -- most important relevant events in the history of NNs, deep learning, AI, computer science, and mathematics in general, crediting those who laid foundations of the field. The text contains numerous hyperlinks to relevant overview sites from my AI Blog. It supplements my previous deep learning survey (2015) which provides hundreds of additional references. Finally, to round it off, I'll put things in a broader historic context spanning the time since the Big Bang until when the universe will be many times older than it is now.},
urldate = {2023-12-03},
publisher = {arXiv},
author = {Schmidhuber, Juergen},
month = dec,
year = {2022},
note = {arXiv:2212.11279 [cs]},
keywords = {Computer Science - Neural and Evolutionary Computing},
annote = {Comment: 75 pages, over 500 references. arXiv admin note: substantial text overlap with arXiv:2005.05744},
file = {arXiv Fulltext PDF:/Users/micl/Zotero/storage/G2XW58H9/Schmidhuber - 2022 - Annotated History of Modern AI and Deep Learning.pdf:application/pdf;arXiv.org Snapshot:/Users/micl/Zotero/storage/P75W945I/2212.html:text/html},
}
@article{welchowski_techniques_2022,
title = {Techniques to {Improve} {Ecological} {Interpretability} of {Black}-{Box} {Machine} {Learning} {Models}},
volume = {27},
issn = {1537-2693},
url = {https://doi.org/10.1007/s13253-021-00479-7},
doi = {10.1007/s13253-021-00479-7},
abstract = {Statistical modeling of ecological data is often faced with a large number of variables as well as possible nonlinear relationships and higher-order interaction effects. Gradient boosted trees (GBT) have been successful in addressing these issues and have shown a good predictive performance in modeling nonlinear relationships, in particular in classification settings with a categorical response variable. They also tend to be robust against outliers. However, their black-box nature makes it difficult to interpret these models. We introduce several recently developed statistical tools to the environmental research community in order to advance interpretation of these black-box models. To analyze the properties of the tools, we applied gradient boosted trees to investigate biological health of streams within the contiguous USA, as measured by a benthic macroinvertebrate biotic index. Based on these data and a simulation study, we demonstrate the advantages and limitations of partial dependence plots (PDP), individual conditional expectation (ICE) curves and accumulated local effects (ALE) in their ability to identify covariate–response relationships. Additionally, interaction effects were quantified according to interaction strength (IAS) and Friedman’s \$\$H{\textasciicircum}2\$\$statistic. Interpretable machine learning techniques are useful tools to open the black-box of gradient boosted trees in the environmental sciences. This finding is supported by our case study on the effect of impervious surface on the benthic condition, which agrees with previous results in the literature. Overall, the most important variables were ecoregion, bed stability, watershed area, riparian vegetation and catchment slope. These variables were also present in most identified interaction effects. In conclusion, graphical tools (PDP, ICE, ALE) enable visualization and easier interpretation of GBT but should be supported by analytical statistical measures. Future methodological research is needed to investigate the properties of interaction tests. Supplementary materials accompanying this paper appear on-line.},
language = {en},
number = {1},
urldate = {2023-12-03},
journal = {Journal of Agricultural, Biological and Environmental Statistics},
author = {Welchowski, Thomas and Maloney, Kelly O. and Mitchell, Richard and Schmid, Matthias},
month = mar,
year = {2022},
keywords = {Boosting, Interaction terms, Interpretable machine learning, Macroinvertebrates, Stream health},
pages = {175--197},
file = {Full Text PDF:/Users/micl/Zotero/storage/TPMT2XPT/Welchowski et al. - 2022 - Techniques to Improve Ecological Interpretability .pdf:application/pdf},
}
@book{biecek_explanatory_2020,
title = {Explanatory {Model} {Analysis}},
url = {https://ema.drwhy.ai/},
abstract = {This book introduces unified language for exploration, explanation and examination of predictive machine learning models.},
urldate = {2023-12-03},
author = {Biecek, Przemyslaw and Burzykowski, Tomasz},
year = {2020},
file = {Snapshot:/Users/micl/Zotero/storage/C3ZUM24Y/ema.drwhy.ai.html:text/html},
}
@misc{bycroft_llm_2023,
title = {{LLM} {Visualization}},
url = {https://bbycroft.net/llm},
urldate = {2023-12-03},
author = {Bycroft, Brendan},
year = {2023},
file = {LLM Visualization:/Users/micl/Zotero/storage/46AYQEXH/llm.html:text/html},
}
@article{kunzel_metalearners_2019,
title = {Metalearners for estimating heterogeneous treatment effects using machine learning},
volume = {116},
url = {https://www.pnas.org/doi/abs/10.1073/pnas.1804597116},
doi = {10.1073/pnas.1804597116},
abstract = {There is growing interest in estimating and analyzing heterogeneous treatment effects in experimental and observational studies. We describe a number of metaalgorithms that can take advantage of any supervised learning or regression method in machine learning and statistics to estimate the conditional average treatment effect (CATE) function. Metaalgorithms build on base algorithms—such as random forests (RFs), Bayesian additive regression trees (BARTs), or neural networks—to estimate the CATE, a function that the base algorithms are not designed to estimate directly. We introduce a metaalgorithm, the X-learner, that is provably efficient when the number of units in one treatment group is much larger than in the other and can exploit structural properties of the CATE function. For example, if the CATE function is linear and the response functions in treatment and control are Lipschitz-continuous, the X-learner can still achieve the parametric rate under regularity conditions. We then introduce versions of the X-learner that use RF and BART as base learners. In extensive simulation studies, the X-learner performs favorably, although none of the metalearners is uniformly the best. In two persuasion field experiments from political science, we demonstrate how our X-learner can be used to target treatment regimes and to shed light on underlying mechanisms. A software package is provided that implements our methods.},
number = {10},
urldate = {2023-12-09},
journal = {Proceedings of the National Academy of Sciences},
author = {Künzel, Sören R. and Sekhon, Jasjeet S. and Bickel, Peter J. and Yu, Bin},
month = mar,
year = {2019},
note = {Publisher: Proceedings of the National Academy of Sciences},
pages = {4156--4165},
file = {Full Text PDF:/Users/micl/Zotero/storage/MKR9FWQJ/Künzel et al. - 2019 - Metalearners for estimating heterogeneous treatmen.pdf:application/pdf},
}
@article{pearl_causal_2009,
title = {Causal inference in statistics: {An} overview},
volume = {3},
issn = {1935-7516},
shorttitle = {Causal inference in statistics},
url = {https://projecteuclid.org/journals/statistics-surveys/volume-3/issue-none/Causal-inference-in-statistics-An-overview/10.1214/09-SS057.full},
doi = {10.1214/09-SS057},
abstract = {This review presents empirical researchers with recent advances in causal inference, and stresses the paradigmatic shifts that must be undertaken in moving from traditional statistical analysis to causal analysis of multivariate data. Special emphasis is placed on the assumptions that underly all causal inferences, the languages used in formulating those assumptions, the conditional nature of all causal and counterfactual claims, and the methods that have been developed for the assessment of such claims. These advances are illustrated using a general theory of causation based on the Structural Causal Model (SCM) described in Pearl (2000a), which subsumes and unifies other approaches to causation, and provides a coherent mathematical foundation for the analysis of causes and counterfactuals. In particular, the paper surveys the development of mathematical tools for inferring (from a combination of data and assumptions) answers to three types of causal queries: (1) queries about the effects of potential interventions, (also called “causal effects” or “policy evaluation”) (2) queries about probabilities of counterfactuals, (including assessment of “regret,” “attribution” or “causes of effects”) and (3) queries about direct and indirect effects (also known as “mediation”). Finally, the paper defines the formal and conceptual relationships between the structural and potential-outcome frameworks and presents tools for a symbiotic analysis that uses the strong features of both.},
number = {none},
urldate = {2023-12-10},
journal = {Statistics Surveys},
author = {Pearl, Judea},
month = jan,
year = {2009},
note = {Publisher: Amer. Statist. Assoc., the Bernoulli Soc., the Inst. Math. Statist., and the Statist. Soc. Canada},
keywords = {causal effects, causes of effects, confounding, counterfactuals, graphical methods, mediation, policy evaluation, potential-outcome, structural equation models},
pages = {96--146},
file = {Full Text PDF:/Users/micl/Zotero/storage/QEVSNU5U/Pearl - 2009 - Causal inference in statistics An overview.pdf:application/pdf},
}
@misc{pearl_causal_2022,
title = {Causal {Inference}: {History}, {Perspectives}, {Adventures}, and {Unification} ({An} {Interview} with {Judea} {Pearl})},
url = {https://muse.jhu.edu/pub/56/article/867087/summary},
urldate = {2023-12-10},
author = {Pearl, Judea},
year = {2022},
file = {Project MUSE - Causal Inference\: History, Perspectives, Adventures, and Unification (An Interview with Judea Pearl):/Users/micl/Zotero/storage/AESKD3IT/summary.html:text/html},
}
@article{morgan_counterfactuals_2014,
title = {Counterfactuals and {Causal} {Inference}: {Methods} and {Principles} for {Social} {Research}, 2nd {Edition}},
shorttitle = {Counterfactuals and {Causal} {Inference}},
url = {https://stars.library.ucf.edu/etextbooks/298},
author = {Morgan, Stephen and Winship, Christopher},
month = jan,
year = {2014},
file = {"Counterfactuals and Causal Inference\: Methods and Principles for Socia" by Stephen L. Morgan and Christopher Winship:/Users/micl/Zotero/storage/U74QKHFA/298.html:text/html},
}
@misc{facure_alves_causal_2022,
title = {Causal {Inference} for {The} {Brave} and {True} — {Causal} {Inference} for the {Brave} and {True}},
url = {https://matheusfacure.github.io/python-causality-handbook/landing-page.html},
urldate = {2023-12-10},
author = {Facure Alves, Matheus},
year = {2022},
file = {Causal Inference for The Brave and True — Causal Inference for the Brave and True:/Users/micl/Zotero/storage/ZMZM2BRJ/landing-page.html:text/html},
}
@book{molnar_interpretable_2023,
title = {Interpretable {Machine} {Learning}},
url = {https://christophm.github.io/interpretable-ml-book/},
abstract = {Machine learning algorithms usually operate as black boxes and it is unclear how they derived a certain decision. This book is a guide for practitioners to make machine learning decisions interpretable.},
urldate = {2023-12-10},
author = {Molnar, Christoph},
year = {2023},
file = {Snapshot:/Users/micl/Zotero/storage/F775EJWY/interpretable-ml-book.html:text/html},
}
@misc{pok_how_2020,
title = {How uplift modeling works {\textbar} {Blogs}},
url = {https://ambiata.com/blog/2020-07-07-uplift-modeling/},
urldate = {2023-12-10},
author = {Pok, Wilson},
year = {2020},
file = {How uplift modeling works | Blogs:/Users/micl/Zotero/storage/QJDBLXPQ/2020-07-07-uplift-modeling.html:text/html},
}
@misc{shevchenko_types_2023,
title = {Types of customers — scikit-uplift 0.3.1 documentation},
url = {https://www.uplift-modeling.com/en/v0.5.1/user_guide/introduction/clients.html},
urldate = {2023-12-10},
author = {Shevchenko, Maksim},
year = {2023},
file = {Types of customers — scikit-uplift 0.3.1 documentation:/Users/micl/Zotero/storage/6RB8SG2A/clients.html:text/html},
}
@misc{zhang_dive_2023,
title = {Dive into {Deep} {Learning} — {Dive} into {Deep} {Learning} 1.0.3 documentation},
url = {https://d2l.ai/index.html},
urldate = {2023-12-10},
author = {Zhang, Aston and Lipton, Zack and Li, Mu and Smola, Alex},
year = {2023},
file = {Dive into Deep Learning — Dive into Deep Learning 1.0.3 documentation:/Users/micl/Zotero/storage/FYUH75CV/index.html:text/html},
}
@misc{vanderplas_python_2016,
title = {Python {Data} {Science} {Handbook} [{Book}]},
url = {https://www.oreilly.com/library/view/python-data-science/9781491912126/},
abstract = {For many researchers, Python is a first-class tool mainly because of its libraries for storing, manipulating, and gaining insight from data. Several resources exist for individual pieces of this data … - Selection from Python Data Science Handbook [Book]},
language = {en},
urldate = {2023-12-10},
author = {VanderPlas, Jake},
year = {2016},
note = {ISBN: 9781491912058},
file = {Snapshot:/Users/micl/Zotero/storage/PS9825NX/9781491912126.html:text/html},
}
@book{cunningham_causal_2023,
title = {Causal {Inference} {The} {Mixtape}},
url = {https://mixtape.scunning.com/},
urldate = {2023-12-10},
author = {Cunningham, Scott},
year = {2023},
file = {Causal Inference The Mixtape:/Users/micl/Zotero/storage/5FEHRQ3X/mixtape.scunning.com.html:text/html},
}
@misc{causalml_causalml_2023,
title = {{CausalML}},
url = {https://causalml.readthedocs.io/en/latest/index.html},
urldate = {2023-12-10},
author = {{causalml}},
year = {2023},
file = {Welcome to Causal ML’s documentation — causalml documentation:/Users/micl/Zotero/storage/QX95ZBK7/index.html:text/html},
}
@misc{masis_interpretable_2023,
title = {Interpretable {Machine} {Learning} with {Python} - {Second} {Edition}},
url = {https://www.packtpub.com/product/interpretable-machine-learning-with-python-second-edition/9781803235424},
abstract = {A deep dive into the key aspects and challenges of machine learning interpretability using a comprehensive toolkit, including SHAP, feature importance, and causal inference, to build fairer, safer, and more reliable models.},
language = {en},
urldate = {2023-12-10},
journal = {Packt},
author = {Masis, Serg},
year = {2023},
file = {Snapshot:/Users/micl/Zotero/storage/X98X8STZ/9781803235424.html:text/html},
}
@misc{faraway_linear_2014,
title = {Linear {Models} with {R}},
url = {https://www.routledge.com/Linear-Models-with-R/Faraway/p/book/9781439887332},
abstract = {A Hands-On Way to Learning Data Analysis
Part of the core of statistics, linear models are used to make predictions and explain the relationship between the response and the predictors. Understanding linear models is crucial to a broader competence in the practice of statistics. Linear Models with R, Second Edition explains how to use linear models in physical science, engineering, social science, and business applications. The book incorporates several improvements that reflect how the world o},
language = {en},
urldate = {2023-12-10},
journal = {Routledge \& CRC Press},
author = {Faraway, Julian},
year = {2014},
file = {Snapshot:/Users/micl/Zotero/storage/BFRAMGID/9781439887332.html:text/html},
}
@book{wood_generalized_2017,
address = {Boca Raton},
edition = {2},
title = {Generalized {Additive} {Models}: {An} {Introduction} with {R}, {Second} {Edition}},
isbn = {978-1-315-37027-9},
shorttitle = {Generalized {Additive} {Models}},
abstract = {The first edition of this book has established itself as one of the leading references on generalized additive models (GAMs), and the only book on the topic to be introductory in nature with a wealth of practical examples and software implementation. It is self-contained, providing the necessary background in linear models, linear mixed models, and generalized linear models (GLMs), before presenting a balanced treatment of the theory and applications of GAMs and related models.
The author bases his approach on a framework of penalized regression splines, and while firmly focused on the practical aspects of GAMs, discussions include fairly full explanations of the theory underlying the methods. Use of R software helps explain the theory and illustrates the practical application of the methodology. Each chapter contains an extensive set of exercises, with solutions in an appendix or in the book’s R data package gamair, to enable use as a course text or for self-study.},
publisher = {Chapman and Hall/CRC},
author = {Wood, Simon N.},
month = may,
year = {2017},
doi = {10.1201/9781315370279},
}
@book{harrell_regression_2015,
address = {Cham},
edition = {2},
series = {Springer {Series} in {Statistics}},
title = {Regression {Modeling} {Strategies}: {With} {Applications} to {Linear} {Models}, {Logistic} and {Ordinal} {Regression}, and {Survival} {Analysis}},
isbn = {978-3-319-19424-0 978-3-319-19425-7},
shorttitle = {Regression {Modeling} {Strategies}},
url = {https://link.springer.com/10.1007/978-3-319-19425-7},
language = {en},
urldate = {2023-12-10},
publisher = {Springer International Publishing},
author = {Harrell, Frank E.},
year = {2015},
doi = {10.1007/978-3-319-19425-7},
keywords = {Regression analysis, Generalized least squares, knitr reproducible documents, Linear models, Logistic regression, Predictive modeling, R statistical software, Survival analysis},
}
@misc{gelman_what_2013,
title = {What are the key assumptions of linear regression? {\textbar} {Statistical} {Modeling}, {Causal} {Inference}, and {Social} {Science}},
url = {https://statmodeling.stat.columbia.edu/2013/08/04/19470/},
urldate = {2023-12-10},
author = {Gelman, Andrew},
year = {2013},
file = {What are the key assumptions of linear regression? | Statistical Modeling, Causal Inference, and Social Science:/Users/micl/Zotero/storage/W7X2XUFB/19470.html:text/html},
}
@book{kuhn_tidy_2023,
title = {Tidy {Modeling} with {R}},
url = {https://www.tmwr.org/},
abstract = {The tidymodels framework is a collection of R packages for modeling and machine learning using tidyverse principles. This book provides a thorough introduction to how to use tidymodels, and an outline of good methodology and statistical practice for phases of the modeling process.},
urldate = {2023-12-10},
author = {Kuhn, Max and Silge, Julia},
year = {2023},
file = {Snapshot:/Users/micl/Zotero/storage/GWJ8P2AP/www.tmwr.org.html:text/html},
}
@misc{ucla_advanced_research_computing_faq_2023,
title = {{FAQ}: {What} are pseudo {R}-squareds?},
url = {https://stats.oarc.ucla.edu/other/mult-pkg/faq/general/faq-what-are-pseudo-r-squareds/},
urldate = {2023-12-11},
author = {{UCLA Advanced Research Computing}},
year = {2023},
file = {FAQ\: What are pseudo R-squareds?:/Users/micl/Zotero/storage/GS5XPW6K/faq-what-are-pseudo-r-squareds.html:text/html},
}
@book{roback_beyond_2021,
title = {Beyond {Multiple} {Linear} {Regression}},
url = {https://bookdown.org/roback/bookdown-BeyondMLR/},
abstract = {An applied textbook on generalized linear models and multilevel models for advanced undergraduates, featuring many real, unique data sets. It is intended to be accessible to undergraduate students who have successfully completed a regression course. Even though there is no mathematical prerequisite, we still introduce fairly sophisticated topics such as likelihood theory, zero-inflated Poisson, and parametric bootstrapping in an intuitive and applied manner. We believe strongly in case studies featuring real data and real research questions; thus, most of the data in the textbook arises from collaborative research conducted by the authors and their students, or from student projects. Our goal is that, after working through this material, students will develop an expanded toolkit and a greater appreciation for the wider world of data and statistical modeling.},
urldate = {2023-12-12},
author = {Roback, Paul and Legler, Julie},
year = {2021},
file = {Snapshot:/Users/micl/Zotero/storage/KFPVKNZP/bookdown-BeyondMLR.html:text/html},
}
@book{clark_mixed_2023,
title = {Mixed {Models} with {R}},
url = {https://m-clark.github.io/mixed-models-with-R/},
abstract = {This is an introduction to using mixed models in R. It covers the most common techniques employed, with demonstration primarily via the lme4 package. Discussion includes extensions into generalized mixed models, Bayesian approaches, and realms beyond.},
urldate = {2023-12-12},
author = {Clark, Michael},
year = {2023},
file = {Snapshot:/Users/micl/Zotero/storage/UY2HIX59/mixed-models-with-R.html:text/html},
}
@book{clark_generalized_2022,
title = {Generalized {Additive} {Models}},
url = {https://m-clark.github.io/generalized-additive-models/},
abstract = {An introduction to generalized additive models (GAMs) is provided, with an emphasis on generalization from familiar linear models. It makes extensive use of the mgcv package in R. Discussion includes common approaches, standard extensions, and relations to other techniques. More technical modeling details are described and demonstrated as well.},
urldate = {2023-12-12},
author = {Clark, Michael},
year = {2022},
file = {Snapshot:/Users/micl/Zotero/storage/8EARDAFM/generalized-additive-models.html:text/html},
}
@book{clark_practical_2020,
title = {Practical {Data} {Science}},
url = {https://m-clark.github.io/data-processing-and-visualization/},
abstract = {The focus of this document is on data science tools and techniques in R, including basic programming knowledge, visualization practices, modeling, and more, along with exercises to practice further. In addition, the demonstrations of most content in Python is available via Jupyter notebooks.},
urldate = {2023-12-12},
author = {Clark, Michael},
year = {2020},
file = {Snapshot:/Users/micl/Zotero/storage/TPNXYW7K/data-processing-and-visualization.html:text/html},
}
@book{clark_bayesian_2022,
title = {Bayesian {Basics}},
url = {https://m-clark.github.io/bayesian-basics/},
abstract = {This document provides an introduction to Bayesian data analysis. It is conceptual in nature, but uses the probabilistic programming language Stan for demonstration (and its implementation in R via rstan). From elementary examples, guidance is provided for data preparation, efficient modeling, diagnostics, and more.},
urldate = {2023-12-12},
author = {Clark, Michael},
year = {2022},
file = {Snapshot:/Users/micl/Zotero/storage/YJKRBKDQ/bayesian-basics.html:text/html},
}
@book{koenker_quantile_2005,
title = {Quantile regression},
volume = {38},
url = {https://books.google.com/books?hl=en&lr=&id=WjOdAgAAQBAJ&oi=fnd&pg=PT12&dq=koenker+quantile+regression&ots=CQFHSt5o-W&sig=G1TpKPHo-BRdJ8qWcBrIBI2FQAs},
urldate = {2023-12-14},
publisher = {Cambridge university press},
author = {Koenker, Roger},
year = {2005},
file = {Available Version (via Google Scholar):/Users/micl/Zotero/storage/CN6W28TQ/Koenker - 2005 - Quantile regression.pdf:application/pdf},
}
@article{koenker_galton_2000,
title = {Galton, {Edgeworth}, {Frisch}, and prospects for quantile regression in econometrics},
volume = {95},
issn = {0304-4076},
url = {https://www.sciencedirect.com/science/article/pii/S0304407699000433},
doi = {10.1016/S0304-4076(99)00043-3},
abstract = {The work of three leading figures in the early history of econometrics is used to motivate some recent developments in the theory and application of quantile regression. We stress not only the robustness advantages of this form of semiparametric statistical method, but also the opportunity to recover a more complete description of the statistical relationship between variables. A recent proposal for a more X-robust form of quantile regression based on maximal depth ideas is described along with an interesting historical antecedent. Finally, the notorious computational burden of median regression, and quantile regression more generally, is addressed. It is argued that recent developments in interior point methods for linear programming together with some new preprocessing ideas make it possible to compute quantile regressions as quickly as least-squares regressions throughout the entire range of problem sizes encountered in econometrics.},
number = {2},
urldate = {2023-12-14},
journal = {Journal of Econometrics},
author = {Koenker, Roger},
month = apr,
year = {2000},
keywords = {Interior point methods, Least absolute error regression, Linear programming, Quantile regression, Regression depth},
pages = {347--374},
file = {ScienceDirect Snapshot:/Users/micl/Zotero/storage/SLXLLCEI/S0304407699000433.html:text/html},
}
@misc{wikipedia_relationships_2023,
title = {Relationships among probability distributions},
copyright = {Creative Commons Attribution-ShareAlike License},
url = {https://en.wikipedia.org/wiki/Relationships_among_probability_distributions},
abstract = {In probability theory and statistics, there are several relationships among probability distributions. These relations can be categorized in the following groups:
One distribution is a special case of another with a broader parameter space
Transforms (function of a random variable);
Combinations (function of several variables);
Approximation (limit) relationships;
Compound relationships (useful for Bayesian inference);
Duality;
Conjugate priors.},
language = {en},
urldate = {2023-12-17},
journal = {Wikipedia},
author = {Wikipedia},
month = oct,
year = {2023},
note = {Page Version ID: 1180084573},
file = {Snapshot:/Users/micl/Zotero/storage/MJ26SSLS/Relationships_among_probability_distributions.html:text/html},
}
@misc{murphy_machine_2012,
title = {Machine {Learning}: {A} {Probabilistic} {Perspective}},
url = {https://mitpress.mit.edu/9780262018029/machine-learning/},
abstract = {A comprehensive introduction to machine learning that uses probabilistic models and inference as a unifying approach.Today's Web-enabled deluge of electronic...},
language = {en-US},
urldate = {2023-12-17},
journal = {MIT Press},
author = {Murphy, Kevin P.},
year = {2012},
file = {Snapshot:/Users/micl/Zotero/storage/D7TICG47/9780262018029.html:text/html},
}
@misc{murphy_probabilistic_2023,
title = {Probabilistic {Machine} {Learning}},
url = {https://mitpress.mit.edu/9780262046824/probabilistic-machine-learning/},
abstract = {A detailed and up-to-date introduction to machine learning, presented through the unifying lens of probabilistic modeling and Bayesian decision theory.This b...},
language = {en-US},
urldate = {2023-12-17},
journal = {MIT Press},
author = {Murphy, Kevin P.},
year = {2023},
file = {Snapshot:/Users/micl/Zotero/storage/RX2RXTVM/9780262046824.html:text/html},
}
@misc{databricks_what_2019,
title = {What is {AdaGrad}?},
url = {https://www.databricks.com/glossary/adagrad},
abstract = {Adaptive Gradient Algorithm (Adagrad) is an algorithm for gradient-based optimization and is well-suited when dealing with sparse data.},
language = {en-US},
urldate = {2023-12-18},
journal = {Databricks},
author = {DataBricks},
month = feb,
year = {2019},
file = {Snapshot:/Users/micl/Zotero/storage/L3CINNNH/adagrad.html:text/html},
}
@misc{statquest_with_josh_starmer_gradient_2019,
title = {Gradient {Descent}, {Step}-by-{Step}},
url = {https://www.youtube.com/watch?v=sDv4f4s2SB8},
abstract = {Gradient Descent is the workhorse behind most of Machine Learning. When you fit a machine learning method to a training dataset, you're probably using Gradient Descent. It can optimize parameters in a wide variety of settings. Since it's so fundamental to Machine Learning, I decided to make a "step-by-step" video that shows you exactly how it works.
NOTE: This video assumes you are already familiar with Least Squares and Linear Regression. If not, here's the link to the Quest: • The Main Ideas of Fitting a Line to D...
For a complete index of all the StatQuest videos, check out:
https://statquest.org/video-index/
Sources:
There are a ton of websites that describe the math behind Gradient Descent. One of my favorite is the wikipedia article: https://en.wikipedia.org/wiki/Gradien...
If you'd like to support StatQuest, please consider...
Buying The StatQuest Illustrated Guide to Machine Learning!!!
PDF - https://statquest.gumroad.com/l/wvtmc
Paperback - https://www.amazon.com/dp/B09ZCKR4H6
Kindle eBook - https://www.amazon.com/dp/B09ZG79HXC
Patreon: / statquest
...or...
YouTube Membership: / @statquest
...a cool StatQuest t-shirt or sweatshirt:
https://shop.spreadshirt.com/statques...
...buying one or two of my songs (or go large and get a whole album!)
https://joshuastarmer.bandcamp.com/
...or just donating to StatQuest!
https://www.paypal.me/statquest
Lastly, if you want to keep up with me as I research and create new StatQuests, follow me on twitter:
/ joshuastarmer
0:00 Awesome song and introduction
1:25 Main ideas behind Gradient Descent
5:38 Gradient Descent optimization of a single variable, part 1
9:08 An important note about why we use Gradient Descent
9:40 Gradient Descent optimization of a single variable, part 2
14:48 Review of concepts covered so far
15:48 Gradient Descent optimization of two (or more) variables
21:55 A note about Loss Functions
22:13 Gradient Descent algorithm
23:06 Stochastic Gradient Descent
\#statquest \#gradient \#descent \#ML},
urldate = {2023-12-18},
author = {{StatQuest with Josh Starmer}},
month = feb,
year = {2019},
}
@misc{statquest_with_josh_starmer_stochastic_2019,
title = {Stochastic {Gradient} {Descent}, {Clearly} {Explained}!!!},
url = {https://www.youtube.com/watch?v=vMh0zPT0tLI},
abstract = {Even though Stochastic Gradient Descent sounds fancy, it is just a simple addition to "regular" Gradient Descent. This video sets up the problem that Stochastic Gradient Descent solves and then shows how it does it. Along the way, we discuss situations where Stochastic Gradient Descent is most useful, and some cool features that aren't that obvious.
NOTE: There is a small typo at 9:03. The values for the intercept and slope should be the most recent estimates, 0.86 and 0.68, instead of the original random values, 0 and 1.
NOTE: This StatQuest assumes you already understand "regular" Gradient Descent. If not, check out the 'Quest: • Gradient Descent, Step-by-Step
When I was researching Stochastic Gradient Descent, I found a ton of cool websites that provided lots of details. Here are some of my favorites:
Sebastian Ruder has a nice write-up: http://ruder.io/optimizing-gradient-d...
...as the Usupervised Feature Learning and Deep Learning Tutorial: http://deeplearning.stanford.edu/tuto...
For a complete index of all the StatQuest videos, check out:
https://statquest.org/video-index/
If you'd like to support StatQuest, please consider...
Buying The StatQuest Illustrated Guide to Machine Learning!!!
PDF - https://statquest.gumroad.com/l/wvtmc
Paperback - https://www.amazon.com/dp/B09ZCKR4H6
Kindle eBook - https://www.amazon.com/dp/B09ZG79HXC
Patreon: / statquest
...or...
YouTube Membership: / @statquest
...a cool StatQuest t-shirt or sweatshirt:
https://shop.spreadshirt.com/statques...
...buying one or two of my songs (or go large and get a whole album!)
https://joshuastarmer.bandcamp.com/
...or just donating to StatQuest!
https://www.paypal.me/statquest
Lastly, if you want to keep up with me as I research and create new StatQuests, follow me on twitter:
/ joshuastarmer
Corrections:
9:03. The values for the intercept and slope should be the most recent estimates, 0.86 and 0.68, instead of the original random values, 0 and 1.
9:33 the slope should be 0.7.
\#statquest \#sgd},
urldate = {2023-12-18},
author = {{StatQuest with Josh Starmer}},
month = may,
year = {2019},
}
@misc{brownlee_gradient_2021,
title = {Gradient {Descent} {With} {AdaGrad} {From} {Scratch}},
url = {https://machinelearningmastery.com/gradient-descent-with-adagrad-from-scratch/},
abstract = {Gradient descent is an optimization algorithm that follows the negative gradient of an objective function in order to locate the minimum of the function. A limitation of gradient descent is that it uses the same step size (learning rate) for each input variable. This can be a problem on objective functions that have different amounts […]},
language = {en-US},
urldate = {2023-12-18},
journal = {MachineLearningMastery.com},
author = {Brownlee, Jason},
month = jun,
year = {2021},
file = {Snapshot:/Users/micl/Zotero/storage/GIGERHRX/gradient-descent-with-adagrad-from-scratch.html:text/html},
}
@misc{carpenter_prior_2023,
title = {Prior {Choice} {Recommendations}},
url = {https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations},
abstract = {Stan development repository. The master branch contains the current release. The develop branch contains the latest stable development. See the Developer Process Wiki for details. - stan-dev/stan},
language = {en},
urldate = {2023-12-18},
journal = {GitHub},
author = {Carpenter, Bob},
year = {2023},
file = {Snapshot:/Users/micl/Zotero/storage/L65RJWQT/Prior-Choice-Recommendations.html:text/html},
}
@misc{mcelreath_statistical_2020,
title = {Statistical {Rethinking}: {A} {Bayesian} {Course} with {Examples} in {R} and {STAN}},
shorttitle = {Statistical {Rethinking}},
url = {https://www.routledge.com/Statistical-Rethinking-A-Bayesian-Course-with-Examples-in-R-and-STAN/McElreath/p/book/9780367139919},
abstract = {Statistical Rethinking: A Bayesian Course with Examples in R and Stan builds your knowledge of and confidence in making inferences from data. Reflecting the need for scripting in today's model-based statistics, the book pushes you to perform step-by-step calculations that are usually automated. This unique computational approach ensures that you understand enough of the details to make reasonable choices and interpretations in your own modeling work.
The text presents causal inference and gener},
language = {en},
urldate = {2023-12-18},
journal = {Routledge \& CRC Press},
author = {McElreath, Richard},
year = {2020},
file = {Snapshot:/Users/micl/Zotero/storage/CMEB2ZMH/9780367139919.html:text/html},
}
@book{kuhn_applied_2023,
title = {Applied {Machine} {Learning} for {Tabular} {Data}},
url = {https://aml4td.org/},
language = {en},
urldate = {2023-12-21},
author = {Kuhn, Max and Johnson, Kjell},
month = dec,
year = {2023},
file = {Snapshot:/Users/micl/Zotero/storage/GPMIWQ3M/aml4td.org.html:text/html},
}
@book{fleuret_little_2023,
title = {The {Little} {Book} of {Deep} {Learning}},
url = {https://fleuret.org/francois/lbdl.html},
abstract = {This book is a short introduction to deep learning for readers with a STEM background},
urldate = {2023-12-28},
author = {Fleuret, François},
year = {2023},
file = {The Little Book of Deep Learning:/Users/micl/Zotero/storage/4UTCEVU9/lbdl.html:text/html},
}
@article{belkin_reconciling_2019,
title = {Reconciling modern machine learning practice and the bias-variance trade-off},
volume = {116},
issn = {0027-8424, 1091-6490},
url = {http://arxiv.org/abs/1812.11118},
doi = {10.1073/pnas.1903070116},
abstract = {Breakthroughs in machine learning are rapidly changing science and society, yet our fundamental understanding of this technology has lagged far behind. Indeed, one of the central tenets of the field, the bias-variance trade-off, appears to be at odds with the observed behavior of methods used in the modern machine learning practice. The bias-variance trade-off implies that a model should balance under-fitting and over-fitting: rich enough to express underlying structure in data, simple enough to avoid fitting spurious patterns. However, in the modern practice, very rich models such as neural networks are trained to exactly fit (i.e., interpolate) the data. Classically, such models would be considered over-fit, and yet they often obtain high accuracy on test data. This apparent contradiction has raised questions about the mathematical foundations of machine learning and their relevance to practitioners. In this paper, we reconcile the classical understanding and the modern practice within a unified performance curve. This "double descent" curve subsumes the textbook U-shaped bias-variance trade-off curve by showing how increasing model capacity beyond the point of interpolation results in improved performance. We provide evidence for the existence and ubiquity of double descent for a wide spectrum of models and datasets, and we posit a mechanism for its emergence. This connection between the performance and the structure of machine learning models delineates the limits of classical analyses, and has implications for both the theory and practice of machine learning.},
number = {32},
urldate = {2023-12-28},
journal = {Proceedings of the National Academy of Sciences},
author = {Belkin, Mikhail and Hsu, Daniel and Ma, Siyuan and Mandal, Soumik},
month = aug,
year = {2019},
note = {arXiv:1812.11118 [cs, stat]},
keywords = {Statistics - Machine Learning, Computer Science - Machine Learning},
pages = {15849--15854},
file = {arXiv Fulltext PDF:/Users/micl/Zotero/storage/2W6R7FQB/Belkin et al. - 2019 - Reconciling modern machine learning practice and t.pdf:application/pdf;arXiv.org Snapshot:/Users/micl/Zotero/storage/LZXDXDDD/1812.html:text/html},
}
@misc{bai_understanding_2021,
title = {Understanding the {Under}-{Coverage} {Bias} in {Uncertainty} {Estimation}},
url = {http://arxiv.org/abs/2106.05515},
doi = {10.48550/arXiv.2106.05515},
abstract = {Estimating the data uncertainty in regression tasks is often done by learning a quantile function or a prediction interval of the true label conditioned on the input. It is frequently observed that quantile regression -- a vanilla algorithm for learning quantiles with asymptotic guarantees -- tends to {\textbackslash}emph\{under-cover\} than the desired coverage level in reality. While various fixes have been proposed, a more fundamental understanding of why this under-coverage bias happens in the first place remains elusive. In this paper, we present a rigorous theoretical study on the coverage of uncertainty estimation algorithms in learning quantiles. We prove that quantile regression suffers from an inherent under-coverage bias, in a vanilla setting where we learn a realizable linear quantile function and there is more data than parameters. More quantitatively, for \${\textbackslash}alpha{\textgreater}0.5\$ and small \$d/n\$, the \${\textbackslash}alpha\$-quantile learned by quantile regression roughly achieves coverage \${\textbackslash}alpha - ({\textbackslash}alpha-1/2){\textbackslash}cdot d/n\$ regardless of the noise distribution, where \$d\$ is the input dimension and \$n\$ is the number of training data. Our theory reveals that this under-coverage bias stems from a certain high-dimensional parameter estimation error that is not implied by existing theories on quantile regression. Experiments on simulated and real data verify our theory and further illustrate the effect of various factors such as sample size and model capacity on the under-coverage bias in more practical setups.},
urldate = {2023-12-29},
publisher = {arXiv},
author = {Bai, Yu and Mei, Song and Wang, Huan and Xiong, Caiming},
month = jun,
year = {2021},
note = {arXiv:2106.05515 [cs, math, stat]},
keywords = {Mathematics - Statistics Theory, Statistics - Machine Learning, Computer Science - Machine Learning},
file = {arXiv Fulltext PDF:/Users/micl/Zotero/storage/C9GNLBYX/Bai et al. - 2021 - Understanding the Under-Coverage Bias in Uncertain.pdf:application/pdf;arXiv.org Snapshot:/Users/micl/Zotero/storage/BZ9YWUQY/2106.html:text/html},
}
@book{cohen_statistical_2009,
address = {New York, NY},
edition = {2. ed., reprint},
title = {Statistical power analysis for the behavioral sciences},
isbn = {978-0-8058-0283-2},
abstract = {Statistical Power Analysis for the Behavioral Sciences, Revised Edition emphasizes the importance of statistical power analysis. This edition discusses the concepts and types of power analysis, t test for means, significance of a product moment rs, and differences between correlation coefficients. The test that a proportion is .50 and sign test, differences between proportions, and chi-square tests for goodness of fit and contingency tables are also elaborated. This text likewise covers the F tests of variance proportions in multiple regression/correlation analysis and computational procedures. This publication is intended for behavioral and biosocial scientists who use statistical inference, but also serves as a supplementary textbook for intermediate level courses in applied statistics in behavioral/biosocial science.},
language = {en},
publisher = {Psychology Press},
author = {Cohen, Jacob},
year = {2009},
file = {Cohen - 2009 - Statistical power analysis for the behavioral scie.pdf:/Users/micl/Zotero/storage/YLULXAN9/Cohen - 2009 - Statistical power analysis for the behavioral scie.pdf:application/pdf},
}
@misc{fortuner_machine_2023,
title = {Machine {Learning} {Glossary}},
url = {https://ml-cheatsheet.readthedocs.io/en/latest/index.html},
urldate = {2024-01-12},
author = {Fortuner, Brendan},
year = {2023},
file = {Machine Learning Glossary — ML Glossary documentation:/Users/micl/Zotero/storage/HJ3MZAGU/index.html:text/html},
}
@misc{boykis_what_2023,
title = {What are embeddings?},
url = {http://vickiboykis.com/what_are_embeddings/index.html},
abstract = {A deep-dive into machine learning embeddings.},
language = {en},
urldate = {2024-01-12},
author = {Boykis, Vicki},
year = {2023},
file = {Snapshot:/Users/micl/Zotero/storage/9GRBNJMZ/what_are_embeddings.html:text/html},
}
@misc{stackexchange_are_2015,
type = {Forum post},
title = {Are there any differences between tensors and multidimensional arrays?},
url = {https://math.stackexchange.com/q/1134809},
urldate = {2024-01-16},
journal = {Mathematics Stack Exchange},
author = {StackExchange},
month = feb,
year = {2015},
file = {Snapshot:/Users/micl/Zotero/storage/VRE8CNB8/are-there-any-differences-between-tensors-and-multidimensional-arrays.html:text/html},
}
@misc{howard_practical_2024,
title = {Practical {Deep} {Learning} for {Coders} - {Practical} {Deep} {Learning}},
url = {https://course.fast.ai/},
abstract = {A free course designed for people with some coding experience, who want to learn how to apply deep learning and machine learning to practical problems.},
language = {en},
urldate = {2024-01-16},
journal = {Practical Deep Learning for Coders},
author = {Howard, Jeremy},
year = {2024},
file = {Snapshot:/Users/micl/Zotero/storage/YQCZDBMT/course.fast.ai.html:text/html},
}
@misc{heiss_marginalia_2022,
title = {Marginalia: {A} guide to figuring out what the heck marginal effects, marginal slopes, average marginal effects, marginal effects at the mean, and all these other marginal things are},
shorttitle = {Marginalia},
url = {https://www.andrewheiss.com/blog/2022/05/20/marginalia/#what-are-marginal-effects},
abstract = {Define what marginal effects even are, and then explore the subtle differences between average marginal effects, marginal effects at the mean, and marginal effects at representative values with the marginaleffects and emmeans R packages},
language = {en},
urldate = {2024-02-11},
journal = {Andrew Heiss},
author = {Heiss, Andrew},
year = {2022},
file = {Snapshot:/Users/micl/Zotero/storage/2RQHBN5I/marginalia.html:text/html},
}
@misc{arel-bundock_marginal_2024,
title = {Marginal {Effects} {Zoo}},
url = {https://marginaleffects.com/},
urldate = {2024-02-11},
author = {Arel-Bundock, Vincent},
year = {2024},
file = {Marginal Effects Zoo:/Users/micl/Zotero/storage/2W6CT52S/marginaleffects.com.html:text/html},
}
@misc{quantmetry_mapie_2024,
title = {{MAPIE} - {Model} {Agnostic} {Prediction} {Interval} {Estimator} — {MAPIE} 0.8.2 documentation},
url = {https://mapie.readthedocs.io/en/latest/},
urldate = {2024-03-06},
author = {{Quantmetry}},
year = {2024},
file = {MAPIE - Model Agnostic Prediction Interval Estimator — MAPIE 0.8.2 documentation:/Users/micl/Zotero/storage/N4XWTVTF/latest.html:text/html},
}
@misc{hvitfeldt_feature_2024,
title = {Feature {Engineering} {A}-{Z} {\textbar} {Preface}},
url = {https://feaz-book.com/},
language = {en},
urldate = {2024-03-10},
journal = {Feature Engineering A-Z},
author = {Hvitfeldt, Emil},
month = jan,
year = {2024},
file = {Snapshot:/Users/micl/Zotero/storage/IM3A6CL5/feaz-book.com.html:text/html},
}
@misc{google_classification_2024,
title = {Classification: {ROC} {Curve} and {AUC} {\textbar} {Machine} {Learning}},
shorttitle = {Classification},
url = {https://developers.google.com/machine-learning/crash-course/classification/roc-and-auc},
language = {en},
urldate = {2024-03-12},
journal = {Google for Developers},
author = {Google},
year = {2024},
file = {Snapshot:/Users/micl/Zotero/storage/2LNC83PH/roc-and-auc.html:text/html},
}
@misc{penn_state_54_2018,
title = {5.4 - {A} {Matrix} {Formulation} of the {Multiple} {Regression} {Model} {\textbar} {STAT} 462},
url = {https://online.stat.psu.edu/stat462/node/132/},
urldate = {2024-03-12},
author = {Penn State, Department of Statistics},
year = {2018},
note = {Source of img/matrix\_linreg.png},
file = {5.4 - A Matrix Formulation of the Multiple Regression Model | STAT 462:/Users/micl/Zotero/storage/8AKZVNBW/132.html:text/html},
}
@misc{chernozhukov_applied_2024,
title = {Applied {Causal} {Inference} {Powered} by {ML} and {AI}},
url = {http://arxiv.org/abs/2403.02467},
abstract = {An introduction to the emerging fusion of machine learning and causal inference. The book presents ideas from classical structural equation models (SEMs) and their modern AI equivalent, directed acyclical graphs (DAGs) and structural causal models (SCMs), and covers Double/Debiased Machine Learning methods to do inference in such models using modern predictive tools.},
urldate = {2024-03-12},
publisher = {arXiv},
author = {Chernozhukov, Victor and Hansen, Christian and Kallus, Nathan and Spindler, Martin and Syrgkanis, Vasilis},
month = mar,
year = {2024},
note = {arXiv:2403.02467 [cs, econ, stat]},
keywords = {Statistics - Methodology, Statistics - Machine Learning, Computer Science - Machine Learning, Economics - Econometrics},
file = {arXiv.org Snapshot:/Users/micl/Zotero/storage/5WLWIMVI/2403.html:text/html;Full Text PDF:/Users/micl/Zotero/storage/VI7X2QTU/Chernozhukov et al. - 2024 - Applied Causal Inference Powered by ML and AI.pdf:application/pdf},
}
@book{mckinney_python_2023,
edition = {3},
title = {Python for {Data} {Analysis}},
url = {https://wesmckinney.com/book/},
language = {en},
urldate = {2024-03-19},
author = {McKinney, Wes},
year = {2023},
file = {Snapshot:/Users/micl/Zotero/storage/GFE2YV2S/book.html:text/html},
}
@book{navarro_learning_2018,
title = {Learning {Statistics} with {R}},
url = {https://learningstatisticswithr.com},
language = {en},
author = {Navarro, Danielle},