forked from alecristia/indif-metamega
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ms4k.tex
328 lines (283 loc) · 13.9 KB
/
ms4k.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
\documentclass[man]{apa6}
\usepackage{lmodern}
\usepackage{amssymb,amsmath}
\usepackage{ifxetex,ifluatex}
\usepackage{fixltx2e} % provides \textsubscript
\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\else % if luatex or xelatex
\ifxetex
\usepackage{mathspec}
\else
\usepackage{fontspec}
\fi
\defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase}
\fi
% use upquote if available, for straight quotes in verbatim environments
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
% use microtype if available
\IfFileExists{microtype.sty}{%
\usepackage{microtype}
\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
}{}
\usepackage{hyperref}
\hypersetup{unicode=true,
pdftitle={A comparison of meta-analysis, mega-analysis, and a hybrid approach},
pdfauthor={Ezequiel Koile, Sho Tsuji, \& Alejandrina Cristia},
pdfborder={0 0 0},
breaklinks=true}
\urlstyle{same} % don't use monospace font for urls
\usepackage{graphicx,grffile}
\makeatletter
\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
\makeatother
% Scale images if necessary, so that they will not overflow the page
% margins by default, and it is still possible to overwrite the defaults
% using explicit options in \includegraphics[width, height, ...]{}
\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
\IfFileExists{parskip.sty}{%
\usepackage{parskip}
}{% else
\setlength{\parindent}{0pt}
\setlength{\parskip}{6pt plus 2pt minus 1pt}
}
\setlength{\emergencystretch}{3em} % prevent overfull lines
\providecommand{\tightlist}{%
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
\setcounter{secnumdepth}{0}
% Redefines (sub)paragraphs to behave more like sections
\ifx\paragraph\undefined\else
\let\oldparagraph\paragraph
\renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}}
\fi
\ifx\subparagraph\undefined\else
\let\oldsubparagraph\subparagraph
\renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}}
\fi
%%% Use protect on footnotes to avoid problems with footnotes in titles
\let\rmarkdownfootnote\footnote%
\def\footnote{\protect\rmarkdownfootnote}
\title{A comparison of meta-analysis, mega-analysis, and a hybrid approach}
\author{Ezequiel Koile\textsuperscript{a}, Sho Tsuji\textsuperscript{b}, \&
Alejandrina Cristia\textsuperscript{c}}
\date{}
\shorttitle{individual variation in infant speech processing}
\affiliation{
\vspace{0.5cm}
\textsuperscript{a} ADD\\\textsuperscript{b} ADD\\\textsuperscript{c} ADD}
\usepackage{csquotes}
\usepackage{upgreek}
\captionsetup{font=singlespacing,justification=justified}
\usepackage{longtable}
\usepackage{lscape}
\usepackage{multirow}
\usepackage{tabularx}
\usepackage[flushleft]{threeparttable}
\usepackage{threeparttablex}
\newenvironment{lltable}{\begin{landscape}\begin{center}\begin{ThreePartTable}}{\end{ThreePartTable}\end{center}\end{landscape}}
\makeatletter
\newcommand\LastLTentrywidth{1em}
\newlength\longtablewidth
\setlength{\longtablewidth}{1in}
\newcommand{\getlongtablewidth}{\begingroup \ifcsname LT@\roman{LT@tables}\endcsname \global\longtablewidth=0pt \renewcommand{\LT@entry}[2]{\global\advance\longtablewidth by ##2\relax\gdef\LastLTentrywidth{##2}}\@nameuse{LT@\roman{LT@tables}} \fi \endgroup}
\DeclareDelayedFloatFlavor{ThreePartTable}{table}
\DeclareDelayedFloatFlavor{lltable}{table}
\DeclareDelayedFloatFlavor*{longtable}{table}
\makeatletter
\renewcommand{\efloat@iwrite}[1]{\immediate\expandafter\protected@write\csname efloat@post#1\endcsname{}}
\makeatother
\authornote{
Correspondence concerning this article should be addressed to Ezequiel
Koile, ADD. E-mail: ADD}
\abstract{
Laboratory measures of infant speech perception have been central to the
development of theories of infant language acquisition, and could be
valuable predictors of important individual and group variation. A
recent report suggests that these measures' psychometric properties may
be limited, based on a meta-analytic analysis. We re-analyze those data
using a mega-analytic approach, as well as a variety of hybrid
approaches. We find that (a) the results of meta- and mega-analyses
diverge significantly, and (b) a mega-analytic approach can be more
powerful in detecting stability in performance across days. However,
since it is often difficult to recover original data, we also explore a
hybrid approach, in which some studies are represented by group
statistics, and others by the original data, assessing to what extent
biased data sharing may impact overall conclusions.
}
\begin{document}
\maketitle
Recent years have seen the rise of cumulative science, in which each new
result is integrated into the web of prior knowledge. In this paper, we
introduce mega-analyses, a cumulative science method that is rare in the
study of cognition. Mega-analyses involve integrated analyses of raw
data collected in multiple sites using a single pre-processing and
statistical analysis pipeline. They thus differ from simple analyses in
the scope of the data, dealing with more heterogeneous sets since the
sites may not have collected data in a coordinated manner; and from
meta-analyses in that the raw data are inputed, rather than group-based
statistics. We couch this presentation in the context of one case study
to facilitate a discussion of in which contexts is a meta- or
mega-analyses more appropriate tool in the context of cumulative
science.
\subsection{Study case: Reliability of infant speech perception
measures}\label{study-case-reliability-of-infant-speech-perception-measures}
Infant speech perception measures have been central to the development
of theories of language acquisition. For example, experimental measures
showing that infants' perception for non-native contrasts varied between
6 and 12 months of age led to the conclusion that phonological
acquisition begins as early as this (Werker \& Tees, 1984). More
recently, these same measures have been argued to be valuable predictors
of meaningful individual and group variation. Cristia, Seidl, Junge,
Soderstrom, and Hagoort (2014) meta-analyzed 20 articles and theses
reporting correlations between speech perception measures (including,
for example, perception of non-native contrasts) and vocabulary; as well
as work comparing performance in such tasks by infants at risk of a
language disorder against infants not at risk. The authors concluded
that individual and group variation was significantly associated with
performance in infant speech perception tasks (median r=.31, 95\% CI
{[}.22, .40{]}), in line with the hypothesis that infant speech
perception measures can provide an insight into individual infants'
language skills.
One outstanding issue, however, concerns the psychometric properties of
such measures, and in particular their reliability. We will call the
correlation between two versions of a given measure (such as a
test-retest correlation) its \emph{reliability}; and the correlation
between that measure and a measure of something else (a potential
predictor or predicted variable) its \emph{validity}. Demonstrations
within classical test theory suggest that the validity of any measure is
bounded by the square root of its reliability (e.g., Michell, 2003).
Only two studies have been published reporting on test-retest
correlations of infants undergoing the same speech perception measures
twice (Cristia, Seidl, Singh, \& Houston, 2016; Houston, Horn, Qi, Ting,
\& Gao, 2007). Since the later paper contains data on the first, we only
discuss the later on. Cristia et al. (2016) used a meta-analytic method
to combine the earlier results with test-retest data collected
independently by three research labs (each of which carried out 3-5
studies), which did not know the others were also gathering the same
kind of data. Meta-analytic methods seem to conceptually fit well the
goal of integrating results in such a setting. Thus, the authors first
estimated the test-retest correlation for each of the 12 studies (13
with Houston et al., 2007), and then derived the overall estimate as the
weighted median correlation. Surprisingly, this revealed an overall
r=0.065, with a 95\% confidence interval of {[}-0.12; 0.25{]}. This
results was not just due to some of the studies providing very small
correlation coefficients, but crucially because some of these
coefficients were negative.
If these results are to be believed, this means that correlation work
reporting on these measures' validity (e.g., correlations with
vocabulary estimates, group differences) is suspect, because the
measures' null reliability would entail that no validity can be
measured. Cristia et al. (2016) made the case that it was appropriate to
integrate across all 13 studies because there was no reason to believe
that test-retest would yield negative correlations. While this is true,
calculating correlations as a measure of test-retest stability within
each study and then averaging them is not equivalent to calculating
test-retest stability in behavior taking all studies into account
together.
In fact, genetics and neuroimaging research are seeing the emergence of
work that discusses the benefits of considering raw data together in
what are called mega-analyses {[}e.g., CITATIONS HERE{]}. Mega-analyses
are generally preferrable over meta-analyses because pre-processing
steps can be done in a homogeneous fashion, removing this potential
source of variance. As it happens, Cristia et al. (2016) pre-processed
all data (except for the published study) in the same way, and thus this
was not a consideration. Additionally, a second and crucial advantage of
mega- over meta-analyses is that structured sources of variance can be
better accounted for, and analyses therefore have more power to detect
small and stable effects.
\subsection{The present study}\label{the-present-study}
We reanalyze Cristia et al. (2016)'s data to revisit the question of how
reliable infant speech perception tasks are using a mega-analytic
approach. We address this question first assuming that the analyzer has
access to all data, which is the case here. This answer is most
informative for readers that are specifically interested in the question
of reliability. We then make a few reasonable assumptions regarding data
missingness. If starting a mega-analysis from scratch, the cumulative
scientist may only have access to a subset of the raw data, with the
remaining studies in the literature being represented by group-based
summary statistics at best. Althow raw data may be missing at chance, we
also contemplate three cases of biased missingness that would be due to
selective reporting. First, we assume data would be missing for studies
with a small number of participants; second for studies with small main
effects; and third for those with small or negative test-retest
correlations (assuming that the original researchers finding themselves
in one of these situations may be less prone to make the additional
effort of sharing the raw data).
\subsection{Methods}\label{methods}
Very short because we refer to previous paper for full description of
experiments
table of experiments: short names, short description, N of children,
mean age
We got data from osf, using R, this paper uses Rmd in RStudio \& papaja
for increased reproducibility.
\subsection{Results}\label{results}
\begin{itemize}
\tightlist
\item
how should structure be accounted for - are studies all different from
each other?
\end{itemize}
explain use of AIC to compare models, using also conceptual reasons to
group studies -- ending up with 5 clusters
\begin{itemize}
\tightlist
\item
in mega-analysis, do you also find basically no prediction of test2
from test1?
\end{itemize}
no, we get something pretty different. Explain why
\begin{itemize}
\item
what happens if you only have some data from some studies -- picked at
random? (assuming original authors do not withhold the data for any
reason that is related to the data itself)
\item
and if you only have data from large studies? (authors who ran more
babies are more motivated to share)
\item
and if you only have data from studies with large main effects?
(defined as the average between effect at test1 and effect at test2 --
intuition is that authors with strong effects believe their data more)
\item
and if you only have data from studies with large test-retest
correlations? (idea: authors who find reliability more likely to share
raw data)
\item
use a graph to represent all of the hybrid results (max 4k words!)
\end{itemize}
\subsection{Discussion}\label{discussion}
\begin{itemize}
\tightlist
\item
under what conditions can we trust infant speech perception measures
of individual variation?
\item
we recommend mega- over meta-analysis
\item
explain under what conditions this holds, and when mega-analysis
provides biased view of data
\end{itemize}
\subsection{References}\label{references}
\setlength{\parindent}{-0.5in} \setlength{\leftskip}{0.5in}
\hypertarget{refs}{}
\hypertarget{ref-cristia2014predicting}{}
Cristia, A., Seidl, A., Junge, C., Soderstrom, M., \& Hagoort, P.
(2014). Predicting individual variation in language from infant speech
perception measures. \emph{Child Development}, \emph{85}(4), 1330--1345.
\hypertarget{ref-cristia2016test}{}
Cristia, A., Seidl, A., Singh, L., \& Houston, D. (2016). Test-retest
reliability in infant speech perception tasks. \emph{Infancy},
\emph{21}(5), 648--667. Retrieved from \url{https://osf.io/62nrk/}
\hypertarget{ref-houston2007assessing}{}
Houston, D., Horn, D. L., Qi, R., Ting, J. Y., \& Gao, S. (2007).
Assessing speech discrimination in individual infants. \emph{Infancy},
\emph{12}, 119--145.
\hypertarget{ref-michell2003measurement}{}
Michell, J. (2003). Measurement: A beginner's guide. \emph{Journal of
Applied Measurement}, \emph{4}(4), 298--308.
\hypertarget{ref-werker1984cross}{}
Werker, J. F., \& Tees, R. (1984). Cross-language speech perception.
\emph{Infant Behavior and Development}, \emph{7}, 49--63.
\end{document}