Ability to tokenize words.
convert_tokens(
x,
path = FALSE,
split_pdf = FALSE,
remove_hyphen = TRUE,
token_function = NULL
)The text of the pdf file. This can be specified directly or the pdftools package is used to read the pdf file from a file path. To use the pdftools, the path argument must be set to TRUE.
An optional path designation for the location of the pdf to be converted to text. The pdftools package is used for this conversion.
TRUE/FALSE indicating whether to split the pdf using white space. This would be most useful with multicolumn pdf files. The split_pdf function attempts to recreate the column layout of the text into a single column starting with the left column and proceeding to the right.
TRUE/FALSE indicating whether hyphenated words should be adjusted to combine onto a single line. Default is TRUE.
This is a function from the tokenizers package. Default is the tokenize_words function.
A list of character vectors containing the tokens. More detail can be found looking at the documentation of the tokenizers package.
file <- system.file('pdf', '1610.00147.pdf', package = 'pdfsearch')
convert_tokens(file, path = TRUE)
#> [[1]]
#> [[1]][[1]]
#> [1] "data" "fusion" "for" "correcting"
#> [5] "measurement" "errors" "tracy" "schifeling"
#> [9] "jerome" "p" "reiter" "maria"
#> [13] "deyoreo" "arxiv" "1610.00147v1" "stat.me"
#> [17] "1" "oct" "2016" "abstract"
#> [21] "often" "in" "surveys" "key"
#> [25] "items" "are" "subject" "to"
#> [29] "measurement" "errors" "given" "just"
#> [33] "the" "data" "it" "can"
#> [37] "be" "difficult" "to" "determine"
#> [41] "the" "distribution" "of" "this"
#> [45] "error" "process" "and" "hence"
#> [49] "to" "obtain" "accurate" "inferences"
#> [53] "that" "involve" "the" "error"
#> [57] "prone" "variables" "in" "some"
#> [61] "settings" "however" "analysts" "have"
#> [65] "access" "to" "a" "data"
#> [69] "source" "on" "different" "in"
#> [73] "dividuals" "with" "high" "quality"
#> [77] "measurements" "of" "the" "error"
#> [81] "prone" "survey" "items" "we"
#> [85] "present" "a" "data" "fusion"
#> [89] "framework" "for" "leveraging" "this"
#> [93] "information" "to" "improve" "infer"
#> [97] "ences" "in" "the" "error"
#> [101] "prone" "survey" "the" "basic"
#> [105] "idea" "is" "to" "posit"
#> [109] "models" "about" "the" "rates"
#> [113] "at" "which" "individuals" "make"
#> [117] "errors" "coupled" "with" "models"
#> [121] "for" "the" "values" "reported"
#> [125] "when" "errors" "are" "made"
#> [129] "this" "can" "avoid" "the"
#> [133] "unrealistic" "assumption" "of" "conditional"
#> [137] "independence" "typically" "used" "in"
#> [141] "data" "fusion" "we" "apply"
#> [145] "the" "approach" "on" "the"
#> [149] "re" "ported" "values" "of"
#> [153] "educational" "attainments" "in" "the"
#> [157] "american" "community" "survey" "using"
#> [161] "the" "national" "survey" "of"
#> [165] "college" "graduates" "as" "the"
#> [169] "high" "quality" "data" "source"
#> [173] "in" "doing" "so" "we"
#> [177] "account" "for" "the" "informative"
#> [181] "sampling" "design" "used" "to"
#> [185] "select" "the" "national" "survey"
#> [189] "of" "college" "graduates" "we"
#> [193] "also" "present" "a" "process"
#> [197] "for" "assessing" "the" "sensitivity"
#> [201] "of" "various" "analyses" "to"
#> [205] "different" "choices" "for" "the"
#> [209] "measurement" "error" "models" "supplemental"
#> [213] "material" "is" "available" "online"
#> [217] "key" "words" "fusion" "imputation"
#> [221] "measurement" "error" "missing" "survey"
#> [225] "this" "research" "was" "supported"
#> [229] "by" "the" "national" "science"
#> [233] "foundation" "under" "award" "ses"
#> [237] "11" "31897" "the" "authors"
#> [241] "wish" "to" "thank" "seth"
#> [245] "sanders" "for" "his" "input"
#> [249] "on" "informative" "prior" "specifications"
#> [253] "and" "mauricio" "sadinle" "for"
#> [257] "discussion" "that" "improved" "the"
#> [261] "strategy" "for" "accounting" "for"
#> [265] "the" "informative" "sample" "design"
#> [269] "1"
#>
#>
#> [[2]]
#> [[2]][[1]]
#> [1] "1" "introduction" "survey" "data"
#> [5] "often" "contain" "items" "that"
#> [9] "are" "subject" "to" "measurement"
#> [13] "errors" "for" "example" "some"
#> [17] "respondents" "might" "misunderstand" "a"
#> [21] "question" "or" "accidentally" "select"
#> [25] "the" "wrong" "response" "thereby"
#> [29] "providing" "values" "unequal" "to"
#> [33] "their" "factual" "values" "left"
#> [37] "uncorrected" "these" "measurement" "errors"
#> [41] "can" "result" "in" "degraded"
#> [45] "inferences" "kim" "et" "al"
#> [49] "2015" "unfor" "tunately" "the"
#> [53] "distribution" "of" "the" "measurement"
#> [57] "errors" "typically" "is" "not"
#> [61] "estimable" "from" "the" "survey"
#> [65] "data" "alone" "one" "either"
#> [69] "needs" "to" "make" "strong"
#> [73] "assumptions" "about" "the" "measure"
#> [77] "ment" "error" "process" "e.g"
#> [81] "as" "in" "curran" "and"
#> [85] "hussong" "2009" "or" "leverage"
#> [89] "information" "from" "some" "other"
#> [93] "source" "of" "data" "as"
#> [97] "we" "do" "here" "one"
#> [101] "natural" "source" "of" "information"
#> [105] "is" "a" "validation" "sample"
#> [109] "i.e" "a" "dataset" "with"
#> [113] "both" "the" "reported" "possibly"
#> [117] "erroneous" "values" "and" "the"
#> [121] "true" "values" "measured" "on"
#> [125] "the" "same" "individuals" "these"
#> [129] "individuals" "could" "be" "a"
#> [133] "subset" "of" "the" "original"
#> [137] "survey" "pepe" "1992" "yucel"
#> [141] "and" "zaslavsky" "2005" "or"
#> [145] "a" "completely" "distinct" "set"
#> [149] "raghunathan" "2006" "schenker" "and"
#> [153] "raghunathan" "2007" "schenker" "et"
#> [157] "al" "2010" "carrig" "et"
#> [161] "al" "2015" "with" "validation"
#> [165] "data" "one" "can" "model"
#> [169] "the" "relationship" "between" "the"
#> [173] "error" "prone" "and" "true"
#> [177] "values" "and" "use" "the"
#> [181] "model" "to" "replace" "the"
#> [185] "error" "prone" "items" "with"
#> [189] "multiply" "imputed" "plausible" "true"
#> [193] "values" "reiter" "2008" "siddique"
#> [197] "et" "al" "2015" "in"
#> [201] "many" "settings" "however" "it"
#> [205] "is" "not" "possible" "to"
#> [209] "obtain" "validation" "samples" "e.g"
#> [213] "because" "it" "is" "too"
#> [217] "expensive" "or" "because" "someone"
#> [221] "other" "than" "the" "analyst"
#> [225] "collected" "the" "data" "in"
#> [229] "such" "cases" "another" "potential"
#> [233] "source" "of" "information" "is"
#> [237] "a" "separate" "gold" "stan"
#> [241] "dard" "dataset" "that" "includes"
#> [245] "true" "or" "at" "least"
#> [249] "very" "high" "quality" "measurements"
#> [253] "of" "the" "items" "subject"
#> [257] "to" "error" "but" "not"
#> [261] "the" "error" "prone" "measurements"
#> [265] "unlike" "validation" "sam" "ples"
#> [269] "the" "gold" "standard" "dataset"
#> [273] "alone" "does" "not" "provide"
#> [277] "enough" "information" "to" "estimate"
#> [281] "the" "relationship" "between" "the"
#> [285] "error" "prone" "and" "true"
#> [289] "values" "it" "only" "provides"
#> [293] "information" "about" "the" "distribution"
#> [297] "of" "the" "true" "values"
#> [301] "thus" "analysts" "are" "faced"
#> [305] "with" "a" "special" "case"
#> [309] "2"
#>
#>
#> [[3]]
#> [[3]][[1]]
#> [1] "of" "data" "fusion" "rubin"
#> [5] "1986" "moriarity" "and" "scheuren"
#> [9] "2001" "rassler" "2002" "d’orazio"
#> [13] "et" "al" "2006" "reiter"
#> [17] "2012" "fosdick" "et" "al"
#> [21] "2016" "i.e" "integrating" "information"
#> [25] "from" "two" "databases" "with"
#> [29] "disjoint" "sets" "of" "individuals"
#> [33] "and" "distinct" "variables" "one"
#> [37] "default" "approach" "common" "in"
#> [41] "other" "data" "fusion" "contexts"
#> [45] "is" "to" "assume" "that"
#> [49] "the" "error" "prone" "and"
#> [53] "true" "values" "are" "conditionally"
#> [57] "independent" "given" "some" "set"
#> [61] "of" "variables" "x" "common"
#> [65] "to" "both" "the" "survey"
#> [69] "and" "gold" "standard" "data"
#> [73] "effectively" "this" "involves" "using"
#> [77] "the" "gold" "standard" "data"
#> [81] "to" "estimate" "a" "predictive"
#> [85] "model" "for" "the" "true"
#> [89] "values" "from" "x" "and"
#> [93] "applying" "the" "estimated" "model"
#> [97] "to" "impute" "replacements" "for"
#> [101] "all" "values" "of" "the"
#> [105] "error" "prone" "items" "in"
#> [109] "the" "survey" "however" "this"
#> [113] "conditional" "independence" "assumption" "completely"
#> [117] "disregards" "the" "information" "in"
#> [121] "the" "error" "prone" "values"
#> [125] "which" "sacrifices" "potentially" "useful"
#> [129] "information" "for" "example" "consider"
#> [133] "national" "surveys" "that" "ask"
#> [137] "people" "to" "report" "their"
#> [141] "educational" "attainment" "we" "might"
#> [145] "expect" "most" "people" "to"
#> [149] "report" "values" "accurately" "and"
#> [153] "only" "a" "modest" "fraction"
#> [157] "to" "make" "errors" "it"
#> [161] "does" "not" "make" "sense"
#> [165] "to" "alter" "every" "individual’s"
#> [169] "reported" "values" "in" "the"
#> [173] "survey" "as" "would" "be"
#> [177] "done" "using" "a" "conditional"
#> [181] "independence" "approach" "in" "this"
#> [185] "article" "we" "develop" "a"
#> [189] "framework" "for" "leveraging" "information"
#> [193] "from" "gold" "stan" "dard"
#> [197] "data" "to" "improve" "inferences"
#> [201] "in" "surveys" "subject" "to"
#> [205] "measurement" "errors" "the" "basic"
#> [209] "idea" "is" "to" "encode"
#> [213] "plausible" "assumptions" "about" "the"
#> [217] "error" "process" "e.g" "most"
#> [221] "people" "do" "not" "make"
#> [225] "errors" "when" "reporting" "educational"
#> [229] "attainments" "and" "the" "reporting"
#> [233] "process" "e.g" "when" "people"
#> [237] "make" "errors" "they" "are"
#> [241] "more" "likely" "to" "report"
#> [245] "higher" "attainments" "than" "actual"
#> [249] "into" "statistical" "models" "we"
#> [253] "couple" "those" "models" "with"
#> [257] "distributions" "for" "the" "un"
#> [261] "derlying" "true" "data" "values"
#> [265] "and" "use" "multiple" "imputation"
#> [269] "to" "create" "plausible" "corrections"
#> [273] "to" "the" "error" "prone"
#> [277] "survey" "values" "which" "then"
#> [281] "can" "be" "analyzed" "using"
#> [285] "the" "methods" "from" "ru"
#> [289] "bin" "1987" "this" "allows"
#> [293] "us" "to" "avoid" "unrealistic"
#> [297] "conditional" "independence" "assumptions" "in"
#> [301] "lieu" "of" "more" "scientifically"
#> [305] "defensible" "models" "the" "remainder"
#> [309] "of" "this" "article" "is"
#> [313] "organized" "as" "follows" "in"
#> [317] "section" "2" "we" "review"
#> [321] "an" "3"
#>
#>
#> [[4]]
#> [[4]][[1]]
#> [1] "example" "of" "misreporting" "of"
#> [5] "educational" "attainment" "in" "data"
#> [9] "collected" "by" "the" "census"
#> [13] "bureau" "so" "as" "to"
#> [17] "motivate" "the" "methodological" "developments"
#> [21] "in" "section" "3" "we"
#> [25] "intro" "duce" "the" "general"
#> [29] "framework" "for" "specifying" "measurement"
#> [33] "error" "models" "to" "leverage"
#> [37] "the" "information" "in" "gold"
#> [41] "standard" "data" "in" "section"
#> [45] "4" "we" "apply" "the"
#> [49] "framework" "to" "handle" "po"
#> [53] "tential" "measurement" "error" "in"
#> [57] "educational" "attainment" "in" "the"
#> [61] "2010" "american" "community" "survey"
#> [65] "acs" "using" "the" "2010"
#> [69] "national" "survey" "of" "college"
#> [73] "graduates" "nscg" "as" "a"
#> [77] "gold" "standard" "file" "in"
#> [81] "doing" "so" "we" "deal"
#> [85] "with" "a" "key" "complication"
#> [89] "in" "the" "data" "integration"
#> [93] "accounting" "for" "the" "informative"
#> [97] "sampling" "design" "used" "to"
#> [101] "sample" "the" "nscg" "we"
#> [105] "also" "demonstrate" "how" "the"
#> [109] "framework" "facilitates" "analysis" "of"
#> [113] "the" "sensitivity" "of" "conclusions"
#> [117] "to" "different" "measurement" "error"
#> [121] "model" "specifications" "in" "section"
#> [125] "5" "we" "provide" "a"
#> [129] "brief" "summary" "2" "misreporting"
#> [133] "in" "educational" "attainment" "to"
#> [137] "illustrate" "the" "potential" "for"
#> [141] "reporting" "errors" "in" "educational"
#> [145] "attainment" "that" "can" "arise"
#> [149] "in" "surveys" "we" "examine"
#> [153] "data" "from" "the" "1993"
#> [157] "nscg" "the" "1993" "nscg"
#> [161] "surveyed" "individuals" "who" "indicated"
#> [165] "on" "the" "1990" "census"
#> [169] "long" "form" "that" "they"
#> [173] "had" "at" "least" "a"
#> [177] "college" "degree" "fesco" "et"
#> [181] "al" "2012" "the" "questionnaire"
#> [185] "asked" "about" "educational" "attainment"
#> [189] "including" "detailed" "questions" "about"
#> [193] "educational" "histories" "these" "questions"
#> [197] "greatly" "reduce" "the" "possibility"
#> [201] "of" "respondent" "error" "so"
#> [205] "that" "the" "educational" "attainment"
#> [209] "values" "in" "the" "nscg"
#> [213] "can" "be" "considered" "a"
#> [217] "gold" "standard" "black" "et"
#> [221] "al" "2003" "the" "census"
#> [225] "long" "form" "in" "contrast"
#> [229] "did" "not" "include" "detailed"
#> [233] "follow" "up" "questions" "so"
#> [237] "that" "reported" "educational" "attainment"
#> [241] "is" "prone" "to" "measurement"
#> [245] "error" "the" "census" "bureau"
#> [249] "linked" "each" "individual" "in"
#> [253] "the" "nscg" "to" "their"
#> [257] "corresponding" "record" "in" "the"
#> [261] "long" "form" "data" "the"
#> [265] "linked" "file" "is" "available"
#> [269] "for" "download" "from" "the"
#> [273] "inter" "university" "consortium" "for"
#> [277] "political" "and" "social" "research"
#> [281] "national" "science" "foundation" "4"
#>
#>
#> [[5]]
#> [[5]][[1]]
#> [1] "table" "1" "unweighted" "cross" "tabulation"
#> [6] "of" "reported" "education" "in" "the"
#> [11] "nscg" "and" "census" "long" "form"
#> [16] "from" "the" "linked" "dataset" "ba"
#> [21] "stands" "for" "bachelor’s" "degree" "ma"
#> [26] "stands" "for" "master’s" "degree" "prof"
#> [31] "stands" "for" "professional" "degree" "and"
#> [36] "phd" "stands" "for" "ph" "d"
#> [41] "degree" "the" "14,319" "individuals" "in"
#> [46] "the" "group" "labeled" "no" "degree"
#> [51] "did" "not" "have" "a" "college"
#> [56] "degree" "despite" "reporting" "otherwise" "the"
#> [61] "51,396" "individuals" "in" "the" "group"
#> [66] "labeled" "other" "did" "not" "have"
#> [71] "one" "of" "ba" "ma" "prof"
#> [76] "phd" "and" "are" "discarded" "from"
#> [81] "subsequent" "analyses" "census" "reported" "education"
#> [86] "z" "ba" "ma" "prof" "phd"
#> [91] "total" "ba" "89580" "4109" "1241"
#> [96] "249" "95179" "nscg" "ma" "1218"
#> [101] "33928" "655" "526" "36327" "reported"
#> [106] "prof" "382" "359" "8648" "563"
#> [111] "9952" "education" "phd" "99" "193"
#> [116] "452" "6726" "7470" "total" "91279"
#> [121] "38589" "10996" "8064" "148928" "no"
#> [126] "degree" "10150" "1792" "2040" "337"
#> [131] "14319" "other" "33368" "10912" "4710"
#> [136] "2406" "51396" "1993" "because" "of"
#> [141] "the" "linkages" "we" "can" "characterize"
#> [146] "the" "actual" "measurement" "error" "mechanism"
#> [151] "for" "educational" "attainment" "in" "the"
#> [156] "1990" "long" "form" "data" "in"
#> [161] "the" "nscg" "we" "treat" "the"
#> [166] "highest" "degree" "of" "the" "three"
#> [171] "most" "recent" "degrees" "reported" "coded"
#> [176] "as" "ed6c1" "ed6c2" "and" "ed6c3"
#> [181] "in" "the" "file" "as" "the"
#> [186] "true" "education" "level" "we" "disregard"
#> [191] "any" "degrees" "earned" "in" "the"
#> [196] "years" "1990" "1993" "as" "these"
#> [201] "occur" "in" "the" "three" "year"
#> [206] "gap" "between" "collection" "of" "the"
#> [211] "long" "form" "and" "nscg" "data"
#> [216] "this" "ensures" "consistent" "time" "frames"
#> [221] "for" "the" "nscg" "and" "long"
#> [226] "form" "reported" "values" "we" "cross"
#> [231] "tabulate" "these" "degrees" "with" "the"
#> [236] "degrees" "reported" "in" "the" "long"
#> [241] "form" "data" "coded" "yearsch" "in"
#> [246] "the" "file" "table" "1" "displays"
#> [251] "the" "cross" "tabulation" "a" "similar"
#> [256] "analysis" "was" "done" "by" "black"
#> [261] "et" "al" "2003" "as" "evident"
#> [266] "in" "table" "1" "reported" "education"
#> [271] "levels" "on" "the" "long" "form"
#> [276] "often" "are" "higher" "than" "those"
#> [281] "on" "the" "nscg" "particularly" "for"
#> [286] "individuals" "with" "only" "a" "bachelor’s"
#> [291] "degree" "of" "the" "163,247" "individuals"
#> [296] "in" "scope" "in" "the" "nscg"
#> [301] "over" "14,000" "were" "determined" "not"
#> [306] "to" "have" "at" "least" "a"
#> [311] "bachelor’s" "degree" "when" "asked" "in"
#> [316] "the" "nscg" "despite" "reporting" "otherwise"
#> [321] "5"
#>
#>
#> [[6]]
#> [[6]][[1]]
#> [1] "in" "the" "long" "form"
#> [5] "a" "whopping" "33" "of"
#> [9] "individuals" "who" "reported" "being"
#> [13] "professionals" "in" "the" "long"
#> [17] "form" "actually" "are" "not"
#> [21] "professionals" "according" "to" "the"
#> [25] "nscg" "one" "possible" "explanation"
#> [29] "for" "this" "error" "is"
#> [33] "confusion" "over" "the" "definition"
#> [37] "of" "professionals" "the" "census"
#> [41] "bureau" "intended" "the" "category"
#> [45] "to" "capture" "graduate" "degrees"
#> [49] "from" "universities" "e.g" "j.d"
#> [53] "m.b.a" "m.d" "whereas" "black"
#> [57] "et" "al" "2003" "found"
#> [61] "that" "individuals" "in" "professions"
#> [65] "such" "as" "cosmetology" "nursing"
#> [69] "and" "health" "services" "which"
#> [73] "require" "certifications" "but" "not"
#> [77] "graduate" "degrees" "selected" "the"
#> [81] "category" "in" "spite" "of"
#> [85] "the" "nontrivial" "reporting" "error"
#> [89] "the" "overwhelming" "majority" "of"
#> [93] "individuals" "reported" "education" "levels"
#> [97] "are" "consistent" "in" "the"
#> [101] "long" "form" "and" "in"
#> [105] "the" "nscg" "of" "the"
#> [109] "individuals" "in" "the" "nscg"
#> [113] "who" "had" "at" "least"
#> [117] "a" "college" "degree" "at"
#> [121] "the" "time" "of" "the"
#> [125] "1990" "census" "about" "93.3"
#> [129] "of" "them" "have" "the"
#> [133] "same" "contemporaneous" "education" "levels"
#> [137] "in" "both" "files" "this"
#> [141] "suggests" "that" "most" "people"
#> [145] "report" "correctly" "an" "observation"
#> [149] "we" "want" "to" "leverage"
#> [153] "when" "constructing" "measurement" "error"
#> [157] "models" "for" "education" "in"
#> [161] "the" "2010" "acs" "in"
#> [165] "most" "situations" "we" "do"
#> [169] "not" "have" "the" "good"
#> [173] "fortune" "of" "observing" "individuals"
#> [177] "error" "prone" "and" "true"
#> [181] "values" "simultaneously" "instead" "we"
#> [185] "are" "in" "the" "setting"
#> [189] "represented" "by" "figure" "1"
#> [193] "this" "is" "also" "the"
#> [197] "case" "in" "our" "analysis"
#> [201] "of" "educational" "attainments" "in"
#> [205] "the" "2010" "acs" "described"
#> [209] "in" "section" "4" "the"
#> [213] "sampling" "frame" "for" "the"
#> [217] "2010" "nscg" "is" "constructed"
#> [221] "from" "reported" "education" "levels"
#> [225] "in" "the" "acs" "which"
#> [229] "replaced" "the" "long" "form"
#> [233] "after" "the" "2000" "census"
#> [237] "however" "unlike" "in" "1993"
#> [241] "linked" "data" "are" "not"
#> [245] "available" "as" "public" "use"
#> [249] "files" "therefore" "we" "treat"
#> [253] "the" "2010" "nscg" "as"
#> [257] "gold" "standard" "data" "and"
#> [261] "posit" "measurement" "models" "that"
#> [265] "connect" "the" "information" "from"
#> [269] "the" "two" "data" "sources"
#> [273] "using" "the" "framework" "that"
#> [277] "we" "now" "describe" "6"
#>
#>
#> [[7]]
#> [[7]][[1]]
#> [1] "x" "y" "z" "de"
#> [5] "x" "x" "dg" "x"
#> [9] "x" "figure" "1" "graphical"
#> [13] "representation" "of" "data" "fusion"
#> [17] "set" "up" "in" "the"
#> [21] "survey" "data" "de" "we"
#> [25] "only" "observe" "the" "error"
#> [29] "prone" "measurement" "z" "but"
#> [33] "not" "the" "true" "value"
#> [37] "y" "in" "the" "gold"
#> [41] "standard" "data" "dg" "we"
#> [45] "only" "observe" "y" "but"
#> [49] "not" "z" "we" "observe"
#> [53] "variables" "x" "in" "both"
#> [57] "samples" "3" "measurement" "error"
#> [61] "modeling" "via" "data" "fusion"
#> [65] "as" "in" "figure" "1"
#> [69] "let" "de" "and" "dg"
#> [73] "be" "two" "data" "sources"
#> [77] "comprising" "distinct" "individuals" "with"
#> [81] "sample" "sizes" "ne" "and"
#> [85] "ng" "respectively" "for" "each"
#> [89] "individual" "i" "in" "dg"
#> [93] "or" "de" "let" "xi"
#> [97] "xi1" "xip" "be" "variables"
#> [101] "common" "to" "both" "surveys"
#> [105] "such" "as" "demographic" "variables"
#> [109] "we" "assume" "these" "variables"
#> [113] "have" "been" "harmonized" "d’orazio"
#> [117] "et" "al" "2006" "across"
#> [121] "dg" "and" "de" "and"
#> [125] "are" "free" "of" "errors"
#> [129] "let" "y" "represent" "the"
#> [133] "error" "free" "values" "of"
#> [137] "some" "variable" "of" "interest"
#> [141] "and" "let" "z" "be"
#> [145] "an" "error" "prone" "version"
#> [149] "of" "y" "we" "observe"
#> [153] "z" "but" "not" "y"
#> [157] "for" "the" "ne" "individuals"
#> [161] "in" "de" "we" "observe"
#> [165] "y" "but" "not" "z"
#> [169] "for" "the" "ng" "individuals"
#> [173] "in" "dg" "for" "simplicity"
#> [177] "of" "notation" "we" "assume"
#> [181] "no" "missing" "values" "in"
#> [185] "any" "variable" "although" "the"
#> [189] "multiple" "imputation" "framework" "easily"
#> [193] "handles" "missing" "values" "additionally"
#> [197] "de" "can" "include" "variables"
#> [201] "for" "which" "there" "is"
#> [205] "no" "corresponding" "variable" "in"
#> [209] "dg" "these" "variables" "do"
#> [213] "not" "play" "a" "role"
#> [217] "in" "the" "measurement" "error"
#> [221] "modeling" "although" "they" "can"
#> [225] "be" "used" "in" "multiple"
#> [229] "imputation" "inferences" "we" "seek"
#> [233] "to" "estimate" "pr" "y"
#> [237] "z" "x" "and" "use"
#> [241] "it" "to" "create" "multiple"
#> [245] "imputations" "for" "the" "missing"
#> [249] "values" "in" "y" "for"
#> [253] "the" "individuals" "in" "de"
#> [257] "we" "do" "so" "for"
#> [261] "the" "common" "setting" "where"
#> [265] "x" "y" "z" "are"
#> [269] "all" "categorical" "variables" "similar"
#> [273] "ideas" "apply" "for" "other"
#> [277] "data" "types" "for" "j"
#> [281] "1" "p" "let" "each"
#> [285] "xj" "have" "dj" "levels"
#> [289] "let" "z" "have" "dz"
#> [293] "levels" "and" "y" "have"
#> [297] "dy" "7"
#>
#>
#> [[8]]
#> [[8]][[1]]
#> [1] "levels" "typically" "dz" "dy"
#> [5] "but" "this" "need" "not"
#> [9] "be" "the" "case" "generally"
#> [13] "for" "example" "in" "the"
#> [17] "nscg" "acs" "application" "z"
#> [21] "is" "the" "educational" "attainment"
#> [25] "among" "those" "who" "report"
#> [29] "a" "college" "degree" "in"
#> [33] "the" "acs" "which" "has"
#> [37] "dz" "4" "levels" "bachelor’s"
#> [41] "degree" "master’s" "degree" "professional"
#> [45] "degree" "or" "ph" "d"
#> [49] "degree" "and" "y" "is"
#> [53] "the" "educational" "attainment" "in"
#> [57] "the" "nscg" "which" "has"
#> [61] "dy" "5" "levels" "an"
#> [65] "additional" "level" "is" "needed"
#> [69] "because" "some" "individuals" "in"
#> [73] "the" "nscg" "truly" "do"
#> [77] "not" "have" "a" "college"
#> [81] "degree" "for" "all" "i"
#> [85] "de" "let" "ei" "be"
#> [89] "an" "unobserved" "indicator" "of"
#> [93] "a" "reporting" "error" "that"
#> [97] "is" "ei" "1" "when"
#> [101] "yi" "6" "zi" "and"
#> [105] "ei" "0" "otherwise" "using"
#> [109] "e" "enables" "us" "to"
#> [113] "write" "pr" "y" "z"
#> [117] "x" "as" "a" "product"
#> [121] "of" "three" "sub" "models"
#> [125] "for" "individual" "i" "the"
#> [129] "full" "data" "likelihood" "omitting"
#> [133] "parameters" "for" "simplicity" "can"
#> [137] "be" "factored" "as" "pr"
#> [141] "yi" "k" "zi" "l"
#> [145] "xi" "pr" "yi" "k"
#> [149] "xi" "pr" "ei" "e"
#> [153] "yi" "k" "xi" "pr"
#> [157] "zi" "l" "ei" "e"
#> [161] "yi" "k" "xi" "1"
#> [165] "this" "separates" "the" "true"
#> [169] "data" "generation" "process" "and"
#> [173] "the" "measurement" "error" "generation"
#> [177] "process" "which" "facilitates" "model"
#> [181] "specification" "in" "particular" "we"
#> [185] "can" "use" "dg" "to"
#> [189] "estimate" "the" "true" "data"
#> [193] "distribution" "pr" "y" "x"
#> [197] "we" "then" "can" "posit"
#> [201] "different" "models" "for" "the"
#> [205] "rates" "of" "making" "errors"
#> [209] "pr" "ei" "e" "yi"
#> [213] "k" "xi" "and" "for"
#> [217] "the" "reported" "values" "when"
#> [221] "errors" "are" "made" "pr"
#> [225] "zi" "l" "ei" "1"
#> [229] "yi" "k" "xi" "intuitively"
#> [233] "the" "error" "model" "locates"
#> [237] "the" "records" "for" "which"
#> [241] "yi" "6" "zi" "and"
#> [245] "the" "reporting" "model" "captures"
#> [249] "the" "patterns" "of" "misreported"
#> [253] "zi" "of" "course" "when"
#> [257] "ei" "0" "pr" "zi"
#> [261] "yi" "1" "a" "similar"
#> [265] "factorization" "is" "used" "by"
#> [269] "yucel" "and" "zaslavsky" "2005"
#> [273] "he" "et" "al" "2014"
#> [277] "kim" "et" "al" "2015"
#> [281] "and" "manrique" "vallier" "and"
#> [285] "reiter" "2016" "among" "others"
#> [289] "by" "construction" "dg" "and"
#> [293] "de" "cannot" "be" "used"
#> [297] "to" "estimate" "any" "of"
#> [301] "the" "conditional" "probabilities" "pr"
#> [305] "y" "z" "x" "directly"
#> [309] "hence" "we" "have" "to"
#> [313] "restrict" "the" "number" "and"
#> [317] "types" "of" "parameters" "in"
#> [321] "the" "sub" "models" "in"
#> [325] "1" "put" "another" "way"
#> [329] "if" "we" "tried" "to"
#> [333] "estimate" "a" "fully" "8"
#>
#>
#> [[9]]
#> [[9]][[1]]
#> [1] "saturated" "model" "for" "e"
#> [5] "z" "x" "we" "would"
#> [9] "not" "be" "able" "to"
#> [13] "identify" "all" "the" "parameters"
#> [17] "by" "using" "dg" "and"
#> [21] "de" "alone" "to" "see"
#> [25] "this" "assume" "for" "the"
#> [29] "moment" "that" "all" "dx"
#> [33] "πpj" "1" "dj" "possible"
#> [37] "combinations" "of" "x" "are"
#> [41] "present" "in" "dg" "and"
#> [45] "de" "to" "estimate" "the"
#> [49] "distribution" "of" "e" "z"
#> [53] "x" "using" "a" "fully"
#> [57] "saturated" "model" "we" "require"
#> [61] "dy" "1" "dx" "dz"
#> [65] "1" "dy" "dx" "dy"
#> [69] "dz" "1" "dx" "independent"
#> [73] "pieces" "of" "information" "from"
#> [77] "dg" "de" "where" "each"
#> [81] "subtraction" "of" "one" "derives"
#> [85] "from" "the" "requirement" "that"
#> [89] "probabilities" "sum" "to" "one"
#> [93] "however" "dg" "and" "de"
#> [97] "together" "provide" "only" "dz"
#> [101] "1" "dx" "dy" "1"
#> [105] "dx" "dx" "dz" "dy"
#> [109] "1" "dx" "independent" "pieces"
#> [113] "of" "information" "where" "we"
#> [117] "add" "a" "dx" "to"
#> [121] "properly" "account" "for" "the"
#> [125] "sum" "to" "one" "constraint"
#> [129] "a" "key" "insight" "here"
#> [133] "is" "that" "since" "the"
#> [137] "true" "data" "model" "requires"
#> [141] "dy" "dx" "parameters" "to"
#> [145] "estimate" "the" "joint" "distribution"
#> [149] "for" "y" "x" "the"
#> [153] "data" "can" "identify" "at"
#> [157] "most" "dz" "1" "dx"
#> [161] "parameters" "in" "the" "error"
#> [165] "and" "reporting" "models" "combined"
#> [169] "related" "identification" "issues" "arise"
#> [173] "in" "the" "context" "of"
#> [177] "refreshment" "sampling" "to" "adjust"
#> [181] "for" "nonignorable" "attrition" "in"
#> [185] "longitudinal" "studies" "hirano" "et"
#> [189] "al" "2001" "schifeling" "et"
#> [193] "al" "2015" "si" "et"
#> [197] "al" "2015" "3.1" "true"
#> [201] "data" "model" "pr" "yi"
#> [205] "k" "xi" "one" "can"
#> [209] "use" "any" "model" "for"
#> [213] "y" "x" "that" "adequately"
#> [217] "describes" "the" "conditional" "distri"
#> [221] "bution" "such" "as" "a"
#> [225] "multinomial" "logistic" "regression" "in"
#> [229] "the" "nscg" "acs" "application"
#> [233] "we" "use" "a" "fully"
#> [237] "saturated" "multinomial" "model" "accounting"
#> [241] "for" "the" "informative" "sampling"
#> [245] "design" "in" "dg" "using"
#> [249] "the" "approach" "described" "in"
#> [253] "section" "4.1" "one" "also"
#> [257] "could" "use" "a" "joint"
#> [261] "distribution" "for" "y" "x"
#> [265] "such" "as" "a" "log"
#> [269] "linear" "model" "or" "a"
#> [273] "mixture" "of" "multinomials" "model"
#> [277] "dunson" "and" "xing" "2009"
#> [281] "si" "and" "reiter" "2013"
#> [285] "9"
#>
#>
#> [[10]]
#> [[10]][[1]]
#> [1] "3.2" "error" "model" "pr"
#> [5] "ei" "1" "yi" "xi"
#> [9] "in" "cases" "where" "dy"
#> [13] "dz" "a" "generic" "form"
#> [17] "for" "the" "error" "model"
#> [21] "is" "pr" "ei" "1"
#> [25] "xi" "yi" "k" "g"
#> [29] "xi" "yi" "β" "2"
#> [33] "where" "g" "xi" "yi"
#> [37] "β" "is" "some" "function"
#> [41] "of" "its" "arguments" "and"
#> [45] "β" "is" "some" "set"
#> [49] "of" "unknown" "parameters" "a"
#> [53] "convenient" "class" "of" "functions"
#> [57] "that" "we" "use" "here"
#> [61] "is" "the" "logistic" "regression"
#> [65] "of" "ei" "on" "some"
#> [69] "design" "vector" "mi" "derived"
#> [73] "from" "xi" "yi" "with"
#> [77] "corresponding" "coefficients" "β" "the"
#> [81] "analyst" "can" "encode" "different"
#> [85] "versions" "of" "mi" "to"
#> [89] "represent" "assumptions" "about" "the"
#> [93] "error" "process" "the" "simplest"
#> [97] "specification" "is" "to" "set"
#> [101] "each" "mi" "equal" "to"
#> [105] "a" "vector" "of" "ones"
#> [109] "which" "implies" "that" "there"
#> [113] "is" "a" "common" "probability"
#> [117] "of" "error" "for" "all"
#> [121] "individuals" "this" "error" "model"
#> [125] "makes" "sense" "when" "the"
#> [129] "analyst" "believes" "the" "errors"
#> [133] "in" "z" "occur" "completely"
#> [137] "at" "random" "for" "example"
#> [141] "when" "errors" "arise" "simply"
#> [145] "because" "respondents" "accidentally" "and"
#> [149] "randomly" "select" "the" "wrong"
#> [153] "response" "in" "the" "survey"
#> [157] "or" "when" "all" "respondents"
#> [161] "are" "equally" "likely" "to"
#> [165] "misunderstand" "the" "survey" "question"
#> [169] "a" "more" "realistic" "possibility"
#> [173] "is" "to" "allow" "the"
#> [177] "probability" "of" "error" "to"
#> [181] "depend" "on" "some" "variables"
#> [185] "in" "xi" "but" "not"
#> [189] "on" "yi" "e.g" "men"
#> [193] "misreport" "education" "at" "different"
#> [197] "rates" "than" "women" "this"
#> [201] "could" "be" "encoded" "by"
#> [205] "including" "an" "intercept" "for"
#> [209] "one" "of" "the" "sexes"
#> [213] "in" "mi" "finally" "one"
#> [217] "can" "allow" "the" "probability"
#> [221] "of" "error" "to" "depend"
#> [225] "on" "yi" "itself" "for"
#> [229] "example" "people" "who" "truly"
#> [233] "do" "not" "have" "at"
#> [237] "least" "a" "college" "degree"
#> [241] "are" "more" "likely" "to"
#> [245] "misreport" "by" "including" "some"
#> [249] "function" "of" "it" "in"
#> [253] "mi" "in" "the" "case"
#> [257] "where" "dz" "6" "dy"
#> [261] "as" "in" "the" "nscg"
#> [265] "acs" "application" "we" "automatically"
#> [269] "set" "ei" "1" "for"
#> [273] "any" "individual" "with" "yi"
#> [277] "1" "dz" "for" "example"
#> [281] "we" "set" "ei" "1"
#> [285] "for" "all" "individuals" "who"
#> [289] "are" "determined" "in" "the"
#> [293] "nscg" "not" "to" "have"
#> [297] "a" "college" "degree" "but"
#> [301] "report" "so" "in" "the"
#> [305] "acs" "the" "stochastic" "part"
#> [309] "of" "the" "error" "model"
#> [313] "only" "applies" "to" "individuals"
#> [317] "who" "truly" "have" "at"
#> [321] "least" "a" "bachelor’s" "degree"
#> [325] "10"
#>
#>
#> [[11]]
#> [[11]][[1]]
#> [1] "3.3" "reporting" "model" "pr"
#> [5] "zi" "ei" "1" "yi"
#> [9] "xi" "when" "there" "is"
#> [13] "no" "reporting" "error" "for"
#> [17] "individual" "i" "i.e" "ei"
#> [21] "0" "we" "know" "that"
#> [25] "zi" "yi" "when" "there"
#> [29] "is" "a" "reporting" "error"
#> [33] "we" "must" "model" "the"
#> [37] "reported" "value" "zi" "as"
#> [41] "with" "2" "one" "can"
#> [45] "posit" "a" "variety" "of"
#> [49] "distributions" "for" "the" "reporting"
#> [53] "error" "which" "is" "some"
#> [57] "function" "h" "xi" "yi"
#> [61] "α" "with" "parameters" "α"
#> [65] "we" "now" "describe" "a"
#> [69] "few" "reporting" "error" "models"
#> [73] "for" "illustration" "one" "could"
#> [77] "use" "more" "complicated" "models"
#> [81] "e.g" "based" "on" "multinomial"
#> [85] "logistic" "regression" "as" "well"
#> [89] "a" "simple" "model" "assumes"
#> [93] "that" "values" "of" "zi"
#> [97] "are" "equally" "likely" "as"
#> [101] "in" "manrique" "vallier" "and"
#> [105] "reiter" "2016" "we" "have"
#> [109] "1" "dz" "1" "if"
#> [113] "l" "6" "k" "k"
#> [117] "1" "dz" "pr" "zi"
#> [121] "l" "xi" "yi" "k"
#> [125] "ei" "1" "1" "dz"
#> [129] "if" "k" "1" "dz"
#> [133] "3" "0" "otherwise" "such"
#> [137] "a" "reporting" "model" "could"
#> [141] "be" "reasonable" "when" "reporting"
#> [145] "errors" "are" "due" "to"
#> [149] "clerical" "errors" "we" "note"
#> [153] "that" "this" "model" "does"
#> [157] "not" "accurately" "characterize" "the"
#> [161] "reporting" "errors" "in" "the"
#> [165] "1993" "linked" "nscg" "data"
#> [169] "per" "table" "1" "alternatively"
#> [173] "one" "can" "allow" "the"
#> [177] "probabilities" "to" "depend" "on"
#> [181] "yi" "so" "that" "zi"
#> [185] "xi" "yi" "k" "ei"
#> [189] "1" "categorical" "pk" "1"
#> [193] "pk" "dz" "4" "where"
#> [197] "each" "pk" "l" "is"
#> [201] "the" "probability" "of" "reporting"
#> [205] "z" "l" "given" "that"
#> [209] "y" "k" "and" "pk"
#> [213] "k" "0" "one" "can"
#> [217] "further" "parameterize" "the" "reporting"
#> [221] "model" "so" "that" "the"
#> [225] "reporting" "probabilities" "vary" "with"
#> [229] "x" "for" "example" "to"
#> [233] "make" "the" "probabilities" "vary"
#> [237] "with" "sex" "and" "true"
#> [241] "education" "11"
#>
#>
#> [[12]]
#> [[12]][[1]]
#> [1] "values" "we" "can" "use"
#> [5] "categorical" "pm" "k" "1"
#> [9] "pm" "k" "dz" "if"
#> [13] "xi" "sex" "m" "zi"
#> [17] "xi" "yi" "k" "ei"
#> [21] "1" "5" "categorical" "p"
#> [25] "f" "k" "1" "pf"
#> [29] "k" "dz" "if" "xi"
#> [33] "sex" "f" "3.4" "specifying"
#> [37] "and" "estimating" "the" "model"
#> [41] "as" "apparent" "in" "sections"
#> [45] "3.2" "and" "3.3" "the"
#> [49] "error" "and" "reporting" "models"
#> [53] "can" "take" "on" "many"
#> [57] "specifications" "without" "linked" "data"
#> [61] "analysts" "cannot" "use" "exploratory"
#> [65] "data" "analysis" "to" "inform"
#> [69] "the" "model" "choice" "instead"
#> [73] "we" "recommend" "that" "analysts"
#> [77] "posit" "scientifically" "defensible" "measurement"
#> [81] "error" "models" "and" "make"
#> [85] "post" "hoc" "checks" "of"
#> [89] "the" "sensibility" "of" "analyses"
#> [93] "from" "those" "models" "we"
#> [97] "demonstrate" "this" "approach" "in"
#> [101] "section" "4" "for" "example"
#> [105] "analysts" "can" "check" "whether"
#> [109] "or" "not" "the" "predicted"
#> [113] "probabilities" "of" "errors" "implied"
#> [117] "by" "the" "model" "seem"
#> [121] "plausible" "as" "another" "diagnostic"
#> [125] "analysts" "can" "compare" "the"
#> [129] "distribution" "of" "the" "imputed"
#> [133] "values" "of" "y" "x"
#> [137] "in" "de" "to" "the"
#> [141] "empirical" "distribution" "of" "y"
#> [145] "x" "in" "dg" "this"
#> [149] "is" "akin" "to" "diagnostics"
#> [153] "in" "multiple" "imputation" "for"
#> [157] "missing" "data" "that" "compare"
#> [161] "imputed" "and" "observed" "values"
#> [165] "abayomi" "et" "al" "2008"
#> [169] "when" "these" "distributions" "differ"
#> [173] "substantially" "it" "suggests" "the"
#> [177] "measurement" "error" "model" "specification"
#> [181] "or" "possibly" "the" "true"
#> [185] "data" "model" "is" "inadequate"
#> [189] "such" "diagnostic" "checks" "only"
#> [193] "can" "reveal" "problems" "with"
#> [197] "the" "model" "specification" "they"
#> [201] "do" "not" "indicate" "that"
#> [205] "a" "particular" "specification" "is"
#> [209] "correct" "more" "generally" "it"
#> [213] "is" "prudent" "to" "keep"
#> [217] "the" "restrictions" "on" "the"
#> [221] "number" "of" "identifiable" "parameters"
#> [225] "in" "mind" "when" "specifying"
#> [229] "the" "models" "at" "most"
#> [233] "one" "can" "identify" "the"
#> [237] "equiv" "alent" "of" "dz"
#> [241] "1" "dx" "parameters" "in"
#> [245] "the" "combined" "model" "for"
#> [249] "ei" "zi" "xi" "generally"
#> [253] "for" "ease" "of" "specification"
#> [257] "and" "interpretation" "we" "favor"
#> [261] "rich" "error" "models" "e.g"
#> [265] "with" "mi" "including" "variables"
#> [269] "in" "xi" "and" "yi"
#> [273] "coupled" "with" "simple" "reporting"
#> [277] "models" "like" "those" "in"
#> [281] "section" "3.3" "the" "exact"
#> [285] "strategy" "for" "estimating" "the"
#> [289] "model" "depends" "on" "the"
#> [293] "features" "of" "dg" "and"
#> [297] "de" "12"
#>
#>
#> [[13]]
#> [[13]][[1]]
#> [1] "when" "both" "datasets" "can"
#> [5] "be" "treated" "as" "simple"
#> [9] "random" "samples" "we" "suggest"
#> [13] "using" "a" "fully" "bayesian"
#> [17] "approach" "after" "concatenating" "dg"
#> [21] "and" "de" "here" "one"
#> [25] "can" "use" "typical" "prior"
#> [29] "distributions" "for" "the" "true"
#> [33] "data" "and" "error" "models"
#> [37] "for" "reporting" "models" "like"
#> [41] "those" "in" "4" "and"
#> [45] "5" "it" "is" "convenient"
#> [49] "to" "use" "independent" "dirichlet"
#> [53] "priors" "for" "each" "pk"
#> [57] "1" "pk" "k" "1"
#> [61] "pk" "k" "1" "pk"
#> [65] "dz" "in" "the" "nscg"
#> [69] "acs" "application" "we" "create"
#> [73] "prior" "distributions" "for" "the"
#> [77] "reporting" "models" "using" "the"
#> [81] "information" "from" "table" "1"
#> [85] "absent" "such" "information" "analysts"
#> [89] "can" "use" "uniform" "prior"
#> [93] "distributions" "when" "it" "does"
#> [97] "not" "make" "sense" "to"
#> [101] "concatenate" "dg" "and" "de"
#> [105] "it" "can" "be" "convenient"
#> [109] "to" "use" "a" "multi"
#> [113] "stage" "estimation" "strategy" "when"
#> [117] "imputing" "missing" "y" "in"
#> [121] "de" "all" "of" "the"
#> [125] "information" "needed" "from" "dg"
#> [129] "is" "represented" "by" "the"
#> [133] "parameters" "of" "the" "true"
#> [137] "data" "model" "θ" "hence"
#> [141] "we" "first" "can" "construct"
#> [145] "a" "possibly" "approximate" "posterior"
#> [149] "distribution" "of" "θ" "using"
#> [153] "only" "dg" "we" "then"
#> [157] "sample" "many" "draws" "from"
#> [161] "this" "distribution" "we" "plug"
#> [165] "these" "draws" "in" "the"
#> [169] "gibbs" "sampling" "steps" "for"
#> [173] "a" "bayesian" "predictive" "distribution"
#> [177] "for" "yi" "zi" "xi"
#> [181] "θ" "for" "the" "cases"
#> [185] "in" "de" "thereby" "generating"
#> [189] "the" "multiple" "imputations" "we"
#> [193] "describe" "the" "gibbs" "sampler"
#> [197] "for" "this" "step" "for"
#> [201] "the" "nscg" "acs" "application"
#> [205] "in" "the" "supplementary" "material"
#> [209] "4" "adjusting" "for" "reporting"
#> [213] "errors" "in" "education" "in"
#> [217] "the" "2010" "acs" "we"
#> [221] "now" "use" "the" "framework"
#> [225] "to" "adjust" "inferences" "for"
#> [229] "potential" "reporting" "error" "in"
#> [233] "educa" "tional" "attainment" "in"
#> [237] "the" "2010" "acs" "using"
#> [241] "the" "public" "use" "microdata"
#> [245] "for" "the" "2010" "nscg"
#> [249] "as" "the" "gold" "standard"
#> [253] "file" "dg" "we" "consider"
#> [257] "two" "main" "analyses" "that"
#> [261] "could" "be" "affected" "by"
#> [265] "reporting" "error" "in" "education"
#> [269] "first" "we" "estimate" "from"
#> [273] "the" "acs" "the" "number"
#> [277] "of" "science" "and" "engineering"
#> [281] "degrees" "awarded" "to" "women"
#> [285] "we" "base" "the" "estimate"
#> [289] "on" "an" "indicator" "in"
#> [293] "the" "acs" "for" "whether"
#> [297] "or" "not" "each" "individual"
#> [301] "has" "such" "a" "degree"
#> [305] "second" "we" "examine" "13"
#>
#>
#> [[14]]
#> [[14]][[1]]
#> [1] "average" "incomes" "across" "degrees"
#> [5] "this" "focus" "is" "motivated"
#> [9] "in" "part" "by" "the"
#> [13] "findings" "of" "black" "et"
#> [17] "al" "2006" "2008" "who"
#> [21] "found" "that" "apparent" "wage"
#> [25] "gaps" "in" "the" "1990"
#> [29] "census" "long" "form" "data"
#> [33] "could" "be" "explained" "by"
#> [37] "reporting" "errors" "in" "education"
#> [41] "as" "de" "we" "use"
#> [45] "the" "subset" "of" "acs"
#> [49] "microdata" "that" "includes" "only"
#> [53] "individuals" "who" "reported" "a"
#> [57] "bachelor’s" "degree" "or" "higher"
#> [61] "and" "are" "under" "age"
#> [65] "76" "the" "resulting" "sample"
#> [69] "size" "is" "ne" "600"
#> [73] "150" "in" "x" "we"
#> [77] "include" "gender" "age" "group"
#> [81] "24" "and" "younger" "25"
#> [85] "39" "40" "54" "and"
#> [89] "55" "and" "older" "and"
#> [93] "an" "indicator" "for" "whether"
#> [97] "the" "individual’s" "race" "is"
#> [101] "black" "or" "something" "else"
#> [105] "in" "the" "nscg" "we"
#> [109] "discarded" "38" "records" "with"
#> [113] "race" "suppressed" "leaving" "a"
#> [117] "sample" "size" "of" "ng"
#> [121] "77" "150" "we" "consider"
#> [125] "two" "sets" "of" "measurement"
#> [129] "error" "model" "specifications" "the"
#> [133] "first" "set" "uses" "specifications"
#> [137] "like" "those" "in" "section"
#> [141] "3" "with" "flat" "prior"
#> [145] "distributions" "for" "all" "parameters"
#> [149] "we" "use" "this" "set"
#> [153] "to" "illustrate" "model" "diagnostics"
#> [157] "and" "sensitivity" "analysis" "absent"
#> [161] "prior" "information" "about" "the"
#> [165] "measurement" "error" "process" "the"
#> [169] "second" "set" "uses" "a"
#> [173] "common" "error" "and" "reporting"
#> [177] "model" "with" "different" "informative"
#> [181] "prior" "distributions" "on" "its"
#> [185] "parameters" "we" "construct" "these"
#> [189] "informative" "prior" "distributions" "based"
#> [193] "on" "the" "analysis" "of"
#> [197] "the" "1993" "linked" "file"
#> [201] "for" "all" "specifications" "considered"
#> [205] "we" "create" "m" "50"
#> [209] "multiple" "imputations" "of" "the"
#> [213] "plausible" "true" "education" "values"
#> [217] "in" "the" "2010" "acs"
#> [221] "which" "we" "then" "analyze"
#> [225] "using" "the" "methods" "of"
#> [229] "rubin" "1987" "for" "all"
#> [233] "specifications" "the" "true" "data"
#> [237] "model" "is" "a" "saturated"
#> [241] "multinomial" "distribution" "for" "the"
#> [245] "five" "values" "of" "y"
#> [249] "for" "each" "combination" "of"
#> [253] "x" "we" "begin" "by"
#> [257] "describing" "how" "we" "estimate"
#> [261] "the" "parameters" "of" "the"
#> [265] "true" "data" "distribution" "accounting"
#> [269] "for" "the" "informative" "sampling"
#> [273] "design" "of" "the" "nscg"
#> [277] "4.1" "accounting" "for" "informative"
#> [281] "sampling" "design" "of" "nscg"
#> [285] "the" "2010" "nscg" "uses"
#> [289] "reported" "education" "in" "the"
#> [293] "2010" "acs" "as" "a"
#> [297] "stratification" "variable" "fesco" "et"
#> [301] "al" "2012" "finamore" "2013"
#> [305] "its" "unweighted" "percentages" "can"
#> [309] "over" "represent" "14"
#>
#>
#> [[15]]
#> [[15]][[1]]
#> [1] "or" "under" "represent" "degree" "types"
#> [6] "in" "the" "population" "this" "is"
#> [11] "most" "obviously" "the" "case" "for"
#> [16] "individuals" "without" "a" "college" "degree"
#> [21] "yi" "5" "we" "need" "to"
#> [26] "account" "for" "this" "informative" "sampling"
#> [31] "when" "estimating" "parameters" "of" "the"
#> [36] "true" "data" "model" "we" "do"
#> [41] "so" "with" "a" "two" "stage"
#> [46] "approach" "first" "we" "use" "survey"
#> [51] "weighted" "inferences" "to" "estimate" "population"
#> [56] "totals" "of" "y" "x" "from"
#> [61] "the" "2010" "nscg" "second" "we"
#> [66] "turn" "these" "estimates" "into" "an"
#> [71] "approximate" "bayesian" "posterior" "distribution" "for"
#> [76] "input" "to" "fitting" "the" "measurement"
#> [81] "error" "models" "used" "to" "impute"
#> [86] "plausible" "values" "of" "yi" "for"
#> [91] "individuals" "in" "the" "acs" "we"
#> [96] "now" "describe" "this" "process" "which"
#> [101] "can" "be" "used" "generally" "when"
#> [106] "dg" "is" "collected" "via" "a"
#> [111] "complex" "survey" "design" "suppose" "for"
#> [116] "the" "moment" "that" "dy" "dz"
#> [121] "this" "is" "not" "the" "case"
#> [126] "when" "de" "is" "the" "acs"
#> [131] "where" "dz" "4" "and" "dg"
#> [136] "is" "the" "nscg" "where" "dy"
#> [141] "5" "however" "we" "start" "here"
#> [146] "to" "fix" "ideas" "for" "all"
#> [151] "possible" "combinations" "x" "let" "θxk"
#> [156] "pr" "y" "k" "x" "x"
#> [161] "and" "let" "θx" "θx1" "θxdy"
#> [166] "we" "seek" "to" "use" "dg"
#> [171] "to" "specify" "f" "θ" "x"
#> [176] "y" "to" "do" "so" "we"
#> [181] "first" "parameterize" "θxk" "txk" "dj"
#> [186] "1" "py" "txj" "where" "txk"
#> [191] "is" "the" "population" "count" "of"
#> [196] "individuals" "with" "xi" "x" "yi"
#> [201] "k" "we" "estimate" "tx" "tx1"
#> [206] "txdy" "and" "the" "associated" "covariance"
#> [211] "matrix" "of" "the" "estimator" "using"
#> [216] "standard" "survey" "weighted" "estimation" "let"
#> [221] "wi" "be" "the" "sample" "weight"
#> [226] "for" "all" "i" "dg" "we"
#> [231] "compute" "the" "estimated" "total" "and"
#> [236] "associated" "variance" "for" "each" "x"
#> [241] "and" "k" "as" "ng" "x"
#> [246] "t̂xk" "wi" "i" "xi" "x"
#> [251] "yi" "k" "6" "i" "1"
#> [256] "ng" "2" "d" "t̂xk" "ng"
#> [261] "t̂xk" "x" "var" "wi" "i"
#> [266] "xi" "x" "yi" "k" "7"
#> [271] "ng" "1" "i" "1" "ng"
#> [276] "15"
#>
#>
#> [[16]]
#> [[16]][[1]]
#> [1] "for" "each" "k" "and" "l"
#> [6] "with" "l" "6" "k" "we"
#> [11] "also" "compute" "the" "estimated" "covariance"
#> [16] "ng" "ng" "x" "t̂xk" "cov"
#> [21] "t̂xk" "t̂xl" "d" "wi" "i"
#> [26] "xi" "x" "yi" "k" "ng"
#> [31] "1" "i" "1" "ng" "t̂xl"
#> [36] "wi" "i" "xi" "x" "yi"
#> [41] "l" "8" "ng" "the" "variance"
#> [46] "and" "covariance" "estimators" "are" "the"
#> [51] "design" "based" "estimators" "for" "probability"
#> [56] "proportional" "to" "size" "sampling" "with"
#> [61] "replacement" "as" "is" "typical" "of"
#> [66] "multi" "stage" "complex" "surveys" "lohr"
#> [71] "2010" "switching" "now" "to" "a"
#> [76] "bayesian" "modeling" "perspective" "we" "assume"
#> [81] "that" "tx" "log" "normal" "µx"
#> [86] "τx" "so" "as" "to" "ensure"
#> [91] "a" "distribution" "with" "positive" "values"
#> [96] "for" "all" "true" "totals" "we"
#> [101] "select" "µx" "τx" "so" "that"
#> [106] "each" "e" "txk" "t̂xk" "and"
#> [111] "var" "tx" "σ̂" "t̂x" "the"
#> [116] "estimated" "covariance" "matrix" "with" "elements"
#> [121] "defined" "by" "7" "and" "8"
#> [126] "these" "are" "derived" "from" "moment"
#> [131] "matching" "tarmast" "2001" "we" "have"
#> [136] "µxj" "log" "t̂xj" "τx" "j"
#> [141] "j" "2" "9" "2" "τx"
#> [146] "j" "j" "log" "1" "σ̂x"
#> [151] "j" "j" "t̂xj" "10" "τx"
#> [156] "j" "i" "log" "1" "σ̂x"
#> [161] "j" "i" "t̂xj" "t̂xi" "11"
#> [166] "where" "the" "notation" "j" "i"
#> [171] "denotes" "an" "element" "in" "row"
#> [176] "j" "and" "column" "i" "of"
#> [181] "the" "matrix" "we" "draw" "tx"
#> [186] "from" "this" "log" "normal" "distribution"
#> [191] "and" "transform" "to" "draws" "θx"
#> [196] "since" "the" "2010" "nscg" "does"
#> [201] "not" "include" "individuals" "who" "claim"
#> [206] "in" "the" "acs" "to" "have"
#> [211] "less" "than" "a" "bachelor’s" "degree"
#> [216] "we" "cannot" "use" "dg" "directly"
#> [221] "to" "estimate" "tx5" "instead" "we"
#> [226] "estimate" "tx" "tx1" "tx2" "tx3"
#> [231] "tx4" "tx5" "using" "the" "acs"
#> [236] "data" "and" "estimate" "tx1" "tx2"
#> [241] "tx3" "tx4" "from" "the" "nscg"
#> [246] "using" "the" "method" "described" "previously"
#> [251] "this" "leads" "to" "an" "estimate"
#> [256] "for" "tx5" "more" "precisely" "let"
#> [261] "the" "acs" "design" "based" "estimator"
#> [266] "for" "tx" "16"
#>
#>
#> [[17]]
#> [[17]][[1]]
#> [1] "table" "2" "summary" "of"
#> [5] "the" "first" "four" "measurement"
#> [9] "error" "model" "specifications" "for"
#> [13] "2010" "nscg" "acs" "analysis"
#> [17] "these" "models" "use" "flat"
#> [21] "prior" "distributions" "on" "all"
#> [25] "parameters" "error" "model" "reporting"
#> [29] "model" "expression" "for" "mit"
#> [33] "β" "p" "r" "zi"
#> [37] "yi" "k" "ei" "1"
#> [41] "p4" "model" "1" "β1"
#> [45] "k" "2" "βk" "i"
#> [49] "yi" "k" "categorical" "pk"
#> [53] "1" "pk" "4" "p4"
#> [57] "m" "model" "2" "β1"
#> [61] "k" "2" "βk" "i"
#> [65] "yi" "k" "xi" "sex"
#> [69] "m" "categorical" "pk" "1"
#> [73] "pk" "4" "p4" "no"
#> [77] "model" "3" "β1" "k"
#> [81] "2" "βk" "i" "yi"
#> [85] "k" "xi" "black" "no"
#> [89] "categorical" "pk" "1" "pk"
#> [93] "4" "p4" "yes" "k"
#> [97] "1" "βk" "i" "yi"
#> [101] "k" "xi" "black" "yes"
#> [105] "p4" "m" "model" "4"
#> [109] "β1" "k" "2" "βk"
#> [113] "i" "yi" "k" "xi"
#> [117] "sex" "m" "categorical" "pm"
#> [121] "k" "1" "pm" "k"
#> [125] "4" "if" "xi" "sex"
#> [129] "m" "p4" "f" "k"
#> [133] "1" "βk" "i" "yi"
#> [137] "k" "xi" "sex" "f"
#> [141] "categorical" "pf" "k" "1"
#> [145] "pf" "k" "4" "if"
#> [149] "xi" "sex" "f" "be"
#> [153] "t̂x" "with" "design" "based"
#> [157] "variance" "estimate" "σ̂" "2"
#> [161] "t̂x" "we" "sample" "a"
#> [165] "value" "tx" "normal" "t̂x"
#> [169] "σ̂" "2" "t̂x" "using"
#> [173] "an" "independent" "sample" "of"
#> [177] "values" "of" "tx1" "tx4"
#> [181] "from" "4j" "1" "txj"
#> [185] "and" "set" "tx" "tx1"
#> [189] "p" "the" "nscg" "we"
#> [193] "compute" "tx5" "tx" "tx5"
#> [197] "we" "repeat" "these" "steps"
#> [201] "10,000" "times" "we" "then"
#> [205] "compute" "the" "mean" "and"
#> [209] "covariance" "matrix" "of" "the"
#> [213] "10,000" "draws" "which" "we"
#> [217] "again" "plug" "into" "9"
#> [221] "11" "the" "resulting" "log"
#> [225] "normal" "distri" "bution" "is"
#> [229] "the" "approximate" "posterior" "distribution"
#> [233] "of" "θx" "we" "include"
#> [237] "an" "example" "of" "this"
#> [241] "entire" "procedure" "in" "the"
#> [245] "supplementary" "material" "4.2" "measurement"
#> [249] "error" "models" "the" "two"
#> [253] "sets" "of" "measurement" "error"
#> [257] "models" "include" "four" "that"
#> [261] "use" "flat" "prior" "distributions"
#> [265] "and" "three" "that" "use"
#> [269] "informative" "prior" "distributions" "based"
#> [273] "on" "the" "1993" "linked"
#> [277] "data" "for" "all" "error"
#> [281] "models" "we" "use" "a"
#> [285] "logistic" "regression" "of" "ei"
#> [289] "on" "various" "main" "effects"
#> [293] "and" "interactions" "of" "yi"
#> [297] "and" "xi" "for" "all"
#> [301] "reporting" "models" "we" "use"
#> [305] "categorical" "distributions" "with" "probabilities"
#> [309] "that" "depend" "on" "yi"
#> [313] "and" "possibly" "xi" "the"
#> [317] "four" "models" "with" "flat"
#> [321] "prior" "distributions" "are" "summarized"
#> [325] "in" "table" "2" "in"
#> [329] "model" "1" "the" "error"
#> [333] "and" "reporting" "models" "depend"
#> [337] "only" "on" "17"
#>
#>
#> [[18]]
#> [[18]][[1]]
#> [1] "table" "3" "summary" "of"
#> [5] "informative" "prior" "specifications" "for"
#> [9] "2010" "nscg" "acs" "analysis"
#> [13] "for" "males" "with" "bachelor’s"
#> [17] "degrees" "error" "rate" "reporting"
#> [21] "probabilities" "pm" "1" "2"
#> [25] "pm" "1" "3" "pm"
#> [29] "1" "4" "model" "4"
#> [33] "beta" "1" "1" "dirichlet"
#> [37] "1" "1" "1" "model"
#> [41] "5" "beta" "76" "14.24"
#> [45] "dirichlet" "3.54" "1.27" "0.19"
#> [49] "model" "6" "beta" "2724.2"
#> [53] "50862" "dirichlet" "2235.3" "799.7"
#> [57] "123.1" "model" "7" "beta"
#> [61] "500" "99500" "dirichlet" "1"
#> [65] "1" "1" "yi" "model"
#> [69] "2" "and" "3" "keep"
#> [73] "the" "reporting" "model" "as"
#> [77] "in" "4" "but" "expand"
#> [81] "the" "error" "model" "in"
#> [85] "model" "2" "the" "probability"
#> [89] "of" "a" "reporting" "error"
#> [93] "can" "vary" "with" "yi"
#> [97] "and" "sex" "xi" "sex"
#> [101] "in" "model" "3" "error"
#> [105] "probabilities" "can" "vary" "with"
#> [109] "yi" "and" "the" "indicator"
#> [113] "for" "black" "race" "xi"
#> [117] "black" "in" "model" "4"
#> [121] "the" "error" "and" "reporting"
#> [125] "models" "both" "depend" "on"
#> [129] "y" "and" "sex" "for"
#> [133] "models" "5" "7" "we"
#> [137] "use" "the" "specification" "in"
#> [141] "model" "4" "and" "incorporate"
#> [145] "prior" "in" "formation" "about"
#> [149] "the" "measurement" "errors" "from"
#> [153] "the" "1993" "linked" "data"
#> [157] "in" "constructing" "the" "priors"
#> [161] "we" "first" "remove" "records"
#> [165] "that" "have" "been" "flagged"
#> [169] "as" "having" "missing" "education"
#> [173] "that" "has" "been" "imputed"
#> [177] "because" "these" "imputations" "might"
#> [181] "not" "closely" "reflect" "the"
#> [185] "actual" "education" "values" "black"
#> [189] "et" "al" "2003" "table"
#> [193] "3" "displays" "the" "prior"
#> [197] "distributions" "for" "males" "with"
#> [201] "bachelor’s" "degrees" "details" "on"
#> [205] "how" "we" "arrive" "at"
#> [209] "these" "and" "other" "groups"
#> [213] "prior" "specifications" "are" "in"
#> [217] "the" "supplementary" "material" "here"
#> [221] "we" "summarize" "briefly" "x"
#> [225] "for" "model" "5" "we"
#> [229] "set" "the" "prior" "distributions"
#> [233] "for" "each" "βk" "so"
#> [237] "that" "the" "error" "rates"
#> [241] "are" "centered" "at" "the"
#> [245] "estimate" "from" "the" "1993"
#> [249] "linked" "data" "we" "also"
#> [253] "require" "the" "central" "95"
#> [257] "probability" "interval" "of" "the"
#> [261] "prior" "distribution" "on" "each"
#> [265] "error" "rate" "to" "be"
#> [269] "close" "to" "005" "20"
#> [273] "allowing" "for" "a" "wide"
#> [277] "but" "not" "unrealistic" "range"
#> [281] "of" "possible" "error" "rates"
#> [285] "for" "the" "reporting" "probabilities"
#> [289] "pm" "k" "z" "and"
#> [293] "pf" "k" "z" "we"
#> [297] "center" "most" "of" "the"
#> [301] "prior" "distributions" "at" "the"
#> [305] "corresponding" "estimates" "from" "the"
#> [309] "1993" "linked" "data" "we"
#> [313] "require" "the" "central" "95"
#> [317] "probability" "interval" "of" "each"
#> [321] "prior" "distribution" "to" "have"
#> [325] "support" "on" "values" "of"
#> [329] "p" "k" "z" "within"
#> [333] "10" "of" "the" "1993"
#> [337] "point" "estimate" "truncating" "at"
#> [341] "zero" "or" "one" "as"
#> [345] "needed" "one" "exception" "is"
#> [349] "18"
#>
#>
#> [[19]]
#> [[19]][[1]]
#> [1] "the" "reporting" "probabilities" "for"
#> [5] "those" "with" "no" "college"
#> [9] "degree" "who" "report" "professional"
#> [13] "degree" "which" "we" "center"
#> [17] "at" "half" "the" "1993"
#> [21] "estimate" "the" "census" "bureau"
#> [25] "has" "improved" "the" "clarity"
#> [29] "of" "the" "definition" "of"
#> [33] "professional" "in" "the" "20"
#> [37] "years" "since" "the" "1990"
#> [41] "long" "form" "as" "discussed"
#> [45] "in" "the" "prior" "specification"
#> [49] "section" "of" "the" "supplementary"
#> [53] "material" "for" "model" "6"
#> [57] "we" "use" "the" "same"
#> [61] "prior" "means" "as" "in"
#> [65] "model" "5" "for" "both"
#> [69] "error" "and" "re" "porting"
#> [73] "models" "however" "we" "substantially"
#> [77] "tighten" "the" "prior" "distributions"
#> [81] "to" "make" "the" "prior"
#> [85] "variance" "accord" "with" "the"
#> [89] "uncertainty" "in" "the" "point"
#> [93] "estimates" "from" "the" "1993"
#> [97] "linked" "data" "we" "do"
#> [101] "so" "by" "using" "prior"
#> [105] "sample" "sizes" "that" "match"
#> [109] "those" "from" "the" "1993"
#> [113] "nscg" "for" "example" "the"
#> [117] "1993" "nscg" "included" "53,586"
#> [121] "males" "with" "bachelor’s" "degrees"
#> [125] "excluding" "those" "records" "who"
#> [129] "had" "their" "census" "education"
#> [133] "imputed" "we" "therefore" "use"
#> [137] "beta" "2724.2" "50862" "as"
#> [141] "the" "prior" "distribution" "for"
#> [145] "the" "error" "rate" "for"
#> [149] "this" "x" "we" "similarly"
#> [153] "increase" "the" "prior" "sample"
#> [157] "sizes" "for" "the" "reporting"
#> [161] "probabilities" "to" "match" "the"
#> [165] "1993" "nscg" "sample" "sizes"
#> [169] "model" "7" "departs" "from"
#> [173] "the" "1993" "linked" "data"
#> [177] "estimates" "and" "encodes" "a"
#> [181] "strong" "prior" "belief" "that"
#> [185] "almost" "no" "one" "misreports"
#> [189] "their" "education" "except" "for"
#> [193] "haphazard" "mistakes" "here" "we"
#> [197] "set" "the" "prior" "mean"
#> [201] "for" "the" "probability" "of"
#> [205] "misreporting" "education" "to" "005"
#> [209] "for" "all" "demographic" "groups"
#> [213] "we" "use" "a" "prior"
#> [217] "sample" "size" "of" "100,000"
#> [221] "making" "the" "prior" "distribution"
#> [225] "concentrate" "strongly" "around" "005"
#> [229] "for" "the" "reporting" "probabilities"
#> [233] "we" "use" "a" "non"
#> [237] "informative" "prior" "distribution" "for"
#> [241] "convenience" "since" "the" "estimates"
#> [245] "of" "the" "reporting" "probabilities"
#> [249] "are" "strongly" "influenced" "by"
#> [253] "the" "concentrated" "prior" "distributions"
#> [257] "on" "the" "error" "rates"
#> [261] "finally" "for" "comparison" "purposes"
#> [265] "we" "also" "fit" "the"
#> [269] "model" "based" "on" "a"
#> [273] "conditional" "independence" "assumption" "cia"
#> [277] "to" "impute" "yi" "for"
#> [281] "individuals" "in" "the" "acs"
#> [285] "under" "the" "cia" "we"
#> [289] "sample" "θ" "and" "then"
#> [293] "impute" "y" "θ" "x"
#> [297] "from" "the" "true" "data"
#> [301] "model" "here" "we" "do"
#> [305] "not" "use" "the" "reported"
#> [309] "value" "of" "zi" "in"
#> [313] "the" "imputations" "19"
#>
#>
#> [[20]]
#> [[20]][[1]]
#> [1] "4.3" "empirical" "results" "we"
#> [5] "first" "examine" "what" "each"
#> [9] "model" "suggests" "about" "the"
#> [13] "extent" "and" "nature" "of"
#> [17] "the" "mea" "surement" "errors"
#> [21] "in" "the" "2010" "acs"
#> [25] "we" "then" "use" "the"
#> [29] "models" "to" "assess" "sensitivity"
#> [33] "of" "results" "about" "the"
#> [37] "substantive" "questions" "related" "to"
#> [41] "number" "of" "degrees" "and"
#> [45] "income" "4.3.1" "distributions" "of"
#> [49] "errors" "in" "reported" "acs"
#> [53] "education" "values" "table" "4"
#> [57] "displays" "the" "multiple" "imputation"
#> [61] "point" "estimates" "and" "95"
#> [65] "confidence" "intervals" "for" "the"
#> [69] "proportions" "of" "errors" "by"
#> [73] "gender" "and" "nscg" "education"
#> [77] "obtained" "from" "the" "m"
#> [81] "50" "draws" "of" "ei"
#> [85] "for" "all" "individuals" "in"
#> [89] "de" "we" "begin" "by"
#> [93] "comparing" "results" "for" "the"
#> [97] "set" "of" "models" "with"
#> [101] "flat" "prior" "distributions" "models"
#> [105] "1" "4" "and" "the"
#> [109] "cia" "model" "then" "move"
#> [113] "to" "the" "set" "of"
#> [117] "models" "with" "informative" "prior"
#> [121] "distributions" "models" "5" "7"
#> [125] "the" "cia" "model" "suggests"
#> [129] "extremely" "large" "error" "percentages"
#> [133] "especially" "for" "the" "highest"
#> [137] "education" "levels" "these" "rates"
#> [141] "seem" "unlikely" "to" "be"
#> [145] "reality" "leading" "us" "to"
#> [149] "reject" "the" "cia" "model"
#> [153] "the" "overall" "error" "rates"
#> [157] "for" "models" "1" "4"
#> [161] "are" "similar" "and" "more"
#> [165] "realistic" "than" "those" "from"
#> [169] "the" "cia" "model" "the"
#> [173] "differences" "in" "error" "estimates"
#> [177] "between" "model" "2" "and"
#> [181] "model" "1" "suggest" "that"
#> [185] "the" "probability" "of" "error"
#> [189] "depends" "on" "sex" "comparing"
#> [193] "results" "for" "model" "3"
#> [197] "and" "model" "1" "however"
#> [201] "we" "see" "little" "evidence"
#> [205] "of" "important" "race" "effects"
#> [209] "on" "the" "propensity" "to"
#> [213] "make" "errors" "model" "4"
#> [217] "generalizes" "model" "2" "by"
#> [221] "allowing" "the" "reporting" "probabilities"
#> [225] "to" "vary" "by" "sex"
#> [229] "if" "these" "probabilities" "were"
#> [233] "similar" "across" "sex" "in"
#> [237] "reality" "we" "would" "expect"
#> [241] "the" "two" "models" "to"
#> [245] "produce" "similar" "results" "however"
#> [249] "the" "estimated" "error" "rates"
#> [253] "are" "fairly" "different" "for"
#> [257] "example" "the" "estimated" "proportion"
#> [261] "of" "errors" "for" "female"
#> [265] "professionals" "from" "model" "4"
#> [269] "is" "about" "double" "that"
#> [273] "from" "model" "2" "to"
#> [277] "determine" "where" "the" "models"
#> [281] "differ" "most" "we" "examine"
#> [285] "the" "estimated" "reporting" "probabilities"
#> [289] "displayed" "in" "table" "5"
#> [293] "model" "4" "estimates" "some"
#> [297] "significant" "differences" "in" "reporting"
#> [301] "probabilities" "by" "gender" "for"
#> [305] "example" "20"
#>
#>
#> [[21]]
#> [[21]][[1]]
#> [1] "males" "with" "bachelor’s" "degrees"
#> [5] "who" "make" "a" "reporting"
#> [9] "error" "are" "estimated" "to"
#> [13] "report" "a" "master’s" "degree"
#> [17] "with" "probability" "96" "whereas"
#> [21] "females" "with" "bachelor’s" "degrees"
#> [25] "who" "make" "a" "reporting"
#> [29] "error" "are" "estimated" "to"
#> [33] "report" "a" "master’s" "degree"
#> [37] "with" "probability" "67" "and"
#> [41] "a" "professional" "degree" "with"
#> [45] "probability" "30" "other" "large"
#> [49] "differences" "exist" "for" "professional"
#> [53] "degree" "holders" "females" "with"
#> [57] "professional" "degrees" "who" "make"
#> [61] "a" "reporting" "error" "are"
#> [65] "most" "likely" "to" "report"
#> [69] "a" "bachelor’s" "degree" "whereas"
#> [73] "men" "with" "professional" "degrees"
#> [77] "who" "make" "a" "reporting"
#> [81] "error" "are" "most" "likely"
#> [85] "to" "report" "a" "master’s"
#> [89] "degree" "or" "ph" "d"
#> [93] "we" "note" "that" "some"
#> [97] "of" "the" "estimates" "for"
#> [101] "model" "4" "are" "based"
#> [105] "on" "small" "sample" "sizes"
#> [109] "which" "explains" "the" "wide"
#> [113] "standard" "errors" "turning" "to"
#> [117] "models" "5" "7" "we"
#> [121] "can" "see" "the" "impact"
#> [125] "of" "the" "informative" "prior"
#> [129] "distributions" "by" "comparing" "results"
#> [133] "in" "table" "4" "under"
#> [137] "these" "models" "to" "those"
#> [141] "for" "model" "4" "moving"
#> [145] "from" "model" "4" "to"
#> [149] "model" "5" "the" "most"
#> [153] "noticeable" "differences" "are" "for"
#> [157] "women" "with" "a" "ph"
#> [161] "d" "and" "men" "with"
#> [165] "a" "master’s" "degree" "for"
#> [169] "whom" "model" "5" "suggests"
#> [173] "lower" "error" "rates" "these"
#> [177] "groups" "have" "smaller" "sample"
#> [181] "sizes" "so" "that" "the"
#> [185] "data" "do" "not" "swamp"
#> [189] "the" "effects" "of" "the"
#> [193] "prior" "distribution" "when" "making"
#> [197] "the" "prior" "sample" "sizes"
#> [201] "very" "large" "as" "in"
#> [205] "models" "6" "and" "7"
#> [209] "the" "information" "in" "the"
#> [213] "prior" "distribution" "tends" "to"
#> [217] "overwhelm" "the" "information" "in"
#> [221] "the" "data" "we" "provide"
#> [225] "more" "thorough" "investigation" "of"
#> [229] "the" "impact" "of" "the"
#> [233] "prior" "specifications" "in" "the"
#> [237] "supplementary" "material" "of" "course"
#> [241] "we" "cannot" "be" "certain"
#> [245] "which" "model" "most" "closely"
#> [249] "reflects" "the" "true" "measure"
#> [253] "ment" "error" "mechanism" "the"
#> [257] "best" "we" "can" "do"
#> [261] "is" "perform" "diagnostic" "tests"
#> [265] "to" "see" "which" "models"
#> [269] "if" "any" "should" "be"
#> [273] "discounted" "as" "not" "adequately"
#> [277] "describing" "the" "observed" "data"
#> [281] "m" "for" "each" "acs"
#> [285] "imputed" "dataset" "de" "under"
#> [289] "each" "model" "we" "compute"
#> [293] "the" "sample" "pro" "m"
#> [297] "portions" "π̂xk" "and" "corresponding"
#> [301] "multiple" "imputation" "95" "confidence"
#> [305] "intervals" "for" "all" "165̇"
#> [309] "unique" "values" "of" "x"
#> [313] "y" "we" "determine" "how"
#> [317] "many" "of" "the" "80"
#> [321] "estimated" "population" "percentages" "of"
#> [325] "y" "x" "computed" "from"
#> [329] "the" "2010" "nscg" "using"
#> [333] "the" "estimated" "t̂x" "from"
#> [337] "the" "acs" "to" "back"
#> [341] "into" "an" "estimate" "of"
#> [345] "t̂x5" "fall" "within" "the"
#> [349] "multiple" "imputation" "95" "21"
#>
#>
#> [[22]]
#> [[22]][[1]]
#> [1] "confidence" "intervals" "models" "that"
#> [5] "yield" "low" "rates" "do"
#> [9] "not" "describe" "the" "data"
#> [13] "accurately" "for" "model" "1"
#> [17] "73" "of" "80" "nscg"
#> [21] "population" "share" "estimates" "are"
#> [25] "contained" "in" "the" "acs"
#> [29] "multiple" "imputation" "intervals" "corresponding"
#> [33] "counts" "are" "75" "for"
#> [37] "model" "2" "71" "for"
#> [41] "model" "3" "and" "76"
#> [45] "for" "model" "4" "these"
#> [49] "results" "suggest" "that" "model"
#> [53] "1" "and" "model" "3"
#> [57] "may" "be" "inferior" "to"
#> [61] "model" "2" "and" "model"
#> [65] "4" "for" "the" "models"
#> [69] "with" "informative" "prior" "distributions"
#> [73] "the" "counts" "are" "74"
#> [77] "for" "model" "5" "67"
#> [81] "for" "model" "6" "and"
#> [85] "54" "for" "model" "7"
#> [89] "although" "the" "prior" "beliefs"
#> [93] "in" "models" "6" "and"
#> [97] "7" "seem" "plausible" "at"
#> [101] "first" "glance" "the" "diagnostic"
#> [105] "suggests" "that" "they" "do"
#> [109] "not" "describe" "the" "2010"
#> [113] "data" "distributions" "as" "well"
#> [117] "as" "models" "4" "and"
#> [121] "5" "considering" "the" "results"
#> [125] "as" "well" "as" "the"
#> [129] "diagnostic" "check" "if" "we"
#> [133] "had" "to" "choose" "one"
#> [137] "model" "we" "would" "select"
#> [141] "model" "5" "it" "seems"
#> [145] "plausible" "that" "the" "probability"
#> [149] "of" "misreporting" "education" "as"
#> [153] "well" "as" "the" "reported"
#> [157] "value" "itself" "when" "errors"
#> [161] "are" "made" "depend" "on"
#> [165] "both" "sex" "and" "true"
#> [169] "education" "level" "additionally" "the"
#> [173] "prior" "distribution" "from" "the"
#> [177] "1993" "linked" "data" "pulls"
#> [181] "estimates" "in" "groups" "with"
#> [185] "little" "sample" "size" "to"
#> [189] "measurement" "error" "distributions" "that"
#> [193] "seem" "more" "plausible" "on"
#> [197] "face" "value" "however" "one"
#> [201] "need" "not" "use" "the"
#> [205] "data" "fusion" "framework" "for"
#> [209] "measurement" "error" "to" "select"
#> [213] "a" "single" "model" "rather"
#> [217] "one" "can" "use" "the"
#> [221] "framework" "to" "examine" "sensitivity"
#> [225] "of" "analyses" "to" "the"
#> [229] "different" "specifications" "4.3.2" "sensitivity"
#> [233] "analyses" "figure" "2" "displays"
#> [237] "the" "multiply" "imputed" "survey"
#> [241] "weighted" "inferences" "for" "the"
#> [245] "total" "number" "of" "women"
#> [249] "with" "science" "and" "engineering"
#> [253] "degrees" "computing" "using" "the"
#> [257] "acs" "specific" "indicator" "variable"
#> [261] "we" "show" "results" "for"
#> [265] "models" "4" "7" "the"
#> [269] "cia" "model" "and" "based"
#> [273] "on" "the" "acs" "data"
#> [277] "without" "any" "adjustment" "for"
#> [281] "misreporting" "education" "the" "confidence"
#> [285] "intervals" "for" "model" "4"
#> [289] "and" "model" "5" "overlap"
#> [293] "substantially" "suggesting" "not" "much"
#> [297] "practical" "difference" "in" "choosing"
#> [301] "among" "these" "models" "however"
#> [305] "both" "are" "noticeably" "different"
#> [309] "from" "the" "other" "models"
#> [313] "especially" "for" "the" "ph"
#> [317] "d" "and" "professional" "degrees"
#> [321] "as" "the" "prior" "distributions"
#> [325] "on" "the" "error" "rates"
#> [329] "get" "stronger" "the" "estimated"
#> [333] "counts" "increase" "towards" "22"
#>
#>
#> [[23]]
#> [[23]][[1]]
#> [1] "6" "x" "10" "bachelors"
#> [5] "degree" "6" "x" "10"
#> [9] "masters" "degree" "5.2" "2.6"
#> [13] "acs" "cia" "model" "model"
#> [17] "4" "5" "model" "5"
#> [21] "2.5" "estimated" "total" "no"
#> [25] "of" "sci" "and" "eng"
#> [29] "degrees" "estimated" "total" "no"
#> [33] "of" "sci" "and" "eng"
#> [37] "degrees" "model" "6" "model"
#> [41] "7" "4.8" "2.4" "awarded"
#> [45] "to" "women" "awarded" "to"
#> [49] "women" "4.6" "2.3" "4.4"
#> [53] "2.2" "4.2" "2.1" "4"
#> [57] "2" "acs" "cia" "m4"
#> [61] "m5" "m6" "m7" "acs"
#> [65] "cia" "m4" "m5" "m6"
#> [69] "m7" "model" "model" "5"
#> [73] "x" "10" "professional" "degree"
#> [77] "x" "10" "5" "phd"
#> [81] "degree" "7.5" "5" "7"
#> [85] "4.5" "estimated" "total" "no"
#> [89] "of" "sci" "and" "eng"
#> [93] "degrees" "6.5" "estimated" "total"
#> [97] "no" "of" "sci" "and"
#> [101] "eng" "degrees" "4" "6"
#> [105] "awarded" "to" "women" "awarded"
#> [109] "to" "women" "5.5" "3.5"
#> [113] "5" "3" "4.5" "2.5"
#> [117] "4" "2" "3.5" "3"
#> [121] "1.5" "acs" "cia" "m4"
#> [125] "m5" "m6" "m7" "acs"
#> [129] "cia" "m4" "m5" "m6"
#> [133] "m7" "model" "model" "figure"
#> [137] "2" "the" "estimated" "total"
#> [141] "number" "of" "science" "and"
#> [145] "engineering" "degrees" "awarded" "to"
#> [149] "women" "under" "each" "model"
#> [153] "we" "plot" "the" "mean"
#> [157] "and" "95" "confidence" "intervals"
#> [161] "note" "the" "difference" "in"
#> [165] "scale" "for" "each" "degree"
#> [169] "category" "the" "estimate" "using"
#> [173] "the" "acs" "reported" "education"
#> [177] "we" "note" "that" "using"
#> [181] "the" "acs" "reported" "education"
#> [185] "without" "adjustments" "results" "in"
#> [189] "substantially" "higher" "estimated" "totals"
#> [193] "at" "the" "professional" "and"
#> [197] "ph" "d" "levels" "than"
#> [201] "any" "of" "the" "models"
#> [205] "that" "account" "for" "measurement"
#> [209] "error" "we" "also" "note"
#> [213] "that" "the" "cia" "model"
#> [217] "yields" "considerably" "lower" "counts"
#> [221] "for" "all" "but" "bachelor’s"
#> [225] "degrees" "figure" "3" "displays"
#> [229] "inferences" "for" "the" "average"
#> [233] "income" "for" "different" "degrees"
#> [237] "for" "most" "degrees" "the"
#> [241] "point" "estimates" "for" "models"
#> [245] "4" "7" "are" "reasonably"
#> [249] "close" "with" "models" "4"
#> [253] "23"
#>
#>
#> [[24]]
#> [[24]][[1]]
#> [1] "and" "5" "again"
#> [4] "giving" "similar" "results"
#> [7] "the" "estimated" "average"
#> [10] "income" "for" "professionals"
#> [13] "differs" "noticeably" "across"
#> [16] "models" "with" "model"
#> [19] "4" "and" "model"
#> [22] "5" "suggesting" "lower"
#> [25] "averages" "than" "the"
#> [28] "unadjusted" "acs" "estimates"
#> [31] "or" "than" "models"
#> [34] "6" "and" "7"
#> [37] "we" "note" "that"
#> [40] "the" "cia" "model"
#> [43] "estimates" "are" "clearly"
#> [46] "implausible" "as" "an"
#> [49] "independent" "check" "on"
#> [52] "these" "estimates" "we"
#> [55] "considered" "the" "estimated"
#> [58] "average" "earnings" "in"
#> [61] "the" "2010" "current"
#> [64] "population" "survey" "they"
#> [67] "are" "83,720" "for"
#> [70] "professional" "80,600" "for"
#> [73] "ph" "d" "degree"
#> [76] "66,144" "for" "master’s"
#> [79] "degree" "and" "53,976"
#> [82] "for" "bachelor’s" "degree"
#> [85] "http" "www.collegequest.com" "bls"
#> [88] "research" "education" "pays"
#> [91] "2010" "aspx" "these"
#> [94] "line" "up" "more"
#> [97] "closely" "with" "the"
#> [100] "estimates" "from" "model"
#> [103] "5" "than" "any"
#> [106] "other" "model" "especially"
#> [109] "for" "the" "professional"
#> [112] "degree" "category" "where"
#> [115] "the" "estimates" "most"
#> [118] "differ" "figure" "4"
#> [121] "displays" "inferences" "for"
#> [124] "the" "average" "income"
#> [127] "for" "men" "and"
#> [130] "women" "all" "models"
#> [133] "support" "the" "conclusion"
#> [136] "that" "men" "make"
#> [139] "more" "than" "women"
#> [142] "apparently" "misreporting" "in"
#> [145] "education" "does" "not"
#> [148] "account" "for" "that"
#> [151] "gap" "at" "least"
#> [154] "for" "the" "models"
#> [157] "considered" "here" "we"
#> [160] "note" "that" "model"
#> [163] "4" "suggests" "potentially"
#> [166] "larger" "income" "gaps"
#> [169] "between" "male" "and"
#> [172] "female" "ph" "d"
#> [175] "recipients" "than" "the"
#> [178] "other" "models" "5"
#> [181] "concluding" "remarks" "the"
#> [184] "framework" "presented" "in"
#> [187] "this" "article" "offers"
#> [190] "analysts" "tools" "for"
#> [193] "using" "the" "information"
#> [196] "in" "a" "high"
#> [199] "quality" "separate" "data"
#> [202] "source" "to" "adjust"
#> [205] "for" "measurement" "errors"
#> [208] "in" "the" "database"
#> [211] "of" "interest" "key"
#> [214] "to" "the" "framework"
#> [217] "is" "to" "replace"
#> [220] "conditional" "independence" "assumptions"
#> [223] "typically" "used" "in"
#> [226] "data" "fusion" "with"
#> [229] "carefully" "considered" "measurement"
#> [232] "error" "models" "this"
#> [235] "avoids" "sacrificing" "information"
#> [238] "and" "facilitates" "analysis"
#> [241] "of" "the" "sensitivity"
#> [244] "of" "conclusions" "to"
#> [247] "alternative" "measurement" "error"
#> [250] "specifications" "analysts" "can"
#> [253] "use" "diagnostic" "tests"
#> [256] "to" "rule" "out"
#> [259] "some" "measurement" "error"
#> [262] "models" "and" "perform"
#> [265] "sensibility" "tests" "on"
#> [268] "others" "to" "identify"
#> [271] "reasonable" "candidates" "24"
#>
#>
#> [[25]]
#> [[25]][[1]]
#> [1] "4" "x" "10" "10"
#> [5] "acs" "cia" "model" "model"
#> [9] "4" "9" "model" "5"
#> [13] "model" "6" "model" "7"
#> [17] "8" "estimated" "average" "income"
#> [21] "7" "6" "5" "4"
#> [25] "3" "ba" "ma" "prof"
#> [29] "phd" "none" "education" "level"
#> [33] "figure" "3" "multiple" "imputation"
#> [37] "point" "and" "95" "confidence"
#> [41] "interval" "estimates" "for" "the"
#> [45] "average" "income" "within" "each"
#> [49] "education" "level" "the" "acs"
#> [53] "estimate" "is" "the" "survey"
#> [57] "weighted" "estimate" "based" "on"
#> [61] "the" "reported" "education" "level"
#> [65] "in" "the" "2010" "acs"
#> [69] "besides" "survey" "sampling" "contexts"
#> [73] "like" "the" "one" "considered"
#> [77] "here" "involving" "the" "acs"
#> [81] "and" "nscg" "the" "framework"
#> [85] "offers" "potential" "approaches" "for"
#> [89] "dealing" "with" "possible" "mea"
#> [93] "surement" "errors" "in" "organic"
#> [97] "big" "data" "this" "is"
#> [101] "increasingly" "important" "as" "data"
#> [105] "stewards" "and" "analysts" "consider"
#> [109] "replacing" "or" "supplementing" "high"
#> [113] "quality" "but" "expensive" "surveys"
#> [117] "with" "inexpensive" "and" "large"
#> [121] "sample" "organic" "data" "often"
#> [125] "scant" "attention" "is" "paid"
#> [129] "to" "the" "potential" "impact"
#> [133] "of" "measurement" "errors" "on"
#> [137] "inferences" "from" "those" "data"
#> [141] "the" "framework" "could" "be"
#> [145] "used" "with" "high" "quality"
#> [149] "validated" "surveys" "as" "the"
#> [153] "gold" "standard" "data" "allowing"
#> [157] "for" "adjustments" "to" "the"
#> [161] "error" "prone" "organic" "data"
#> [165] "25"
#>
#>
#> [[26]]
#> [[26]][[1]]
#> [1] "4" "x"
#> [3] "10" "bachelors"
#> [5] "degree" "4"
#> [7] "x" "10"
#> [9] "masters" "degree"
#> [11] "7" "8"
#> [13] "male" "small"
#> [15] "marker" "female"
#> [17] "large" "marker"
#> [19] "6.5" "7.5"
#> [21] "7" "estimated"
#> [23] "average" "income"
#> [25] "by" "gender"
#> [27] "estimated" "average"
#> [29] "income" "by"
#> [31] "gender" "6"
#> [33] "6.5" "5.5"
#> [35] "6" "5"
#> [37] "5.5" "4.5"
#> [39] "5" "4"
#> [41] "4.5" "3.5"
#> [43] "4" "3"
#> [45] "3.5" "acs"
#> [47] "cia" "m4"
#> [49] "m5" "m6"
#> [51] "m7" "acs"
#> [53] "cia" "m4"
#> [55] "m5" "m6"
#> [57] "m7" "model"
#> [59] "specification" "model"
#> [61] "specification" "4"
#> [63] "x" "10"
#> [65] "professional" "degree"
#> [67] "x" "10"
#> [69] "4" "phd"
#> [71] "degree" "12"
#> [73] "10" "11"
#> [75] "9" "10"
#> [77] "estimated" "average"
#> [79] "income" "by"
#> [81] "gender" "estimated"
#> [83] "average" "income"
#> [85] "by" "gender"
#> [87] "8" "9"
#> [89] "8" "7"
#> [91] "7" "6"
#> [93] "6" "5"
#> [95] "5" "4"
#> [97] "4" "3"
#> [99] "3" "acs"
#> [101] "cia" "m4"
#> [103] "m5" "m6"
#> [105] "m7" "acs"
#> [107] "cia" "m4"
#> [109] "m5" "m6"
#> [111] "m7" "model"
#> [113] "specification" "model"
#> [115] "specification" "figure"
#> [117] "4" "multiple"
#> [119] "imputation" "point"
#> [121] "and" "95"
#> [123] "confidence" "interval"
#> [125] "estimates" "for"
#> [127] "the" "average"
#> [129] "income" "for"
#> [131] "men" "and"
#> [133] "women" "within"
#> [135] "each" "education"
#> [137] "level" "the"
#> [139] "acs" "estimate"
#> [141] "is" "the"
#> [143] "survey" "weighted"
#> [145] "estimate" "based"
#> [147] "on" "the"
#> [149] "reported" "education"
#> [151] "level" "in"
#> [153] "the" "2010"
#> [155] "acs" "supplementary"
#> [157] "materials" "all"
#> [159] "supplemental" "files"
#> [161] "listed" "below"
#> [163] "are" "contained"
#> [165] "in" "a"
#> [167] "single" "zip"
#> [169] "file" "supplementary.zip"
#> [171] "and" "can"
#> [173] "be" "obtained"
#> [175] "via" "a"
#> [177] "single" "download"
#> [179] "supplementary" "results"
#> [181] "supplementary" "details"
#> [183] "and" "additional"
#> [185] "results" "for"
#> [187] "paper" "supp"
#> [189] "material" "final.pdf"
#> [191] "acs" "data"
#> [193] "2010" "acs"
#> [195] "data" "used"
#> [197] "in" "the"
#> [199] "paper" "acsdata"
#> [201] "2010standardized.csv.zip" "26"
#>
#>
#> [[27]]
#> [[27]][[1]]
#> [1] "matlab" "code" "matlab"
#> [4] "files" "containing" "main"
#> [7] "code" "maincode" "edu"
#> [10] "2010app" "report1993" "m"
#> [13] "and" "helper" "functions"
#> [16] "design.m" "and" "dirsamp.m"
#> [19] "as" "well" "as"
#> [22] "parameter" "files" "mu.mat"
#> [25] "and" "tauspd.mat" "code.zip"
#> [28] "prior" "distributions" "csv"
#> [31] "files" "are" "provided"
#> [34] "for" "priors" "used"
#> [37] "in" "model" "5"
#> [40] "and" "read" "in"
#> [43] "by" "main" "matlab"
#> [46] "code" "referred" "to"
#> [49] "as" "femalereportprior1993" "csv"
#> [52] "malereport" "prior1993" "csv"
#> [55] "betareportprior.csv" "priors.zip" "references"
#> [58] "abayomi" "k" "gelman"
#> [61] "a" "and" "levy"
#> [64] "m" "2008" "diagnostics"
#> [67] "for" "multivariate" "impu"
#> [70] "tations" "journal" "of"
#> [73] "the" "royal" "statistical"
#> [76] "society" "series" "c"
#> [79] "applied" "statistics" "57"
#> [82] "273" "291" "black"
#> [85] "d" "haviland" "a"
#> [88] "sanders" "s" "and"
#> [91] "taylor" "l" "2006"
#> [94] "why" "do" "minority"
#> [97] "men" "earn" "less"
#> [100] "a" "study" "of"
#> [103] "wage" "differentials" "among"
#> [106] "the" "highly" "educated"
#> [109] "the" "review" "of"
#> [112] "economics" "and" "statistics"
#> [115] "88" "300" "313"
#> [118] "black" "d" "sanders"
#> [121] "s" "and" "taylor"
#> [124] "l" "2003" "measurement"
#> [127] "of" "higher" "education"
#> [130] "in" "the" "census"
#> [133] "and" "current" "population"
#> [136] "survey" "journal" "of"
#> [139] "the" "american" "statistical"
#> [142] "association" "98" "545"
#> [145] "554" "black" "d"
#> [148] "a" "haviland" "a"
#> [151] "m" "sanders" "s"
#> [154] "g" "and" "taylor"
#> [157] "l" "j" "2008"
#> [160] "gender" "wage" "disparities"
#> [163] "among" "the" "highly"
#> [166] "educated" "journal" "of"
#> [169] "human" "resources" "43"
#> [172] "630" "659" "carrig"
#> [175] "m" "manrique" "vallier"
#> [178] "d" "ranby" "k"
#> [181] "reiter" "j" "p"
#> [184] "and" "hoyle" "r"
#> [187] "2015" "a" "multiple"
#> [190] "imputation" "based" "method"
#> [193] "for" "the" "retrospective"
#> [196] "harmonization" "of" "data"
#> [199] "sets" "multivariate" "behavioral"
#> [202] "research" "50" "383"
#> [205] "397" "curran" "p"
#> [208] "j" "and" "hussong"
#> [211] "a" "m" "2009"
#> [214] "integrative" "data" "analysis"
#> [217] "the" "simultaneous" "analysis"
#> [220] "of" "multiple" "data"
#> [223] "sets" "psychological" "methods"
#> [226] "14" "81" "100"
#> [229] "d’orazio" "m" "di"
#> [232] "zio" "m" "and"
#> [235] "scanu" "m" "2006"
#> [238] "statistical" "matching" "theory"
#> [241] "and" "practice" "hoboken"
#> [244] "nj" "wiley" "dunson"
#> [247] "d" "b" "and"
#> [250] "xing" "c" "2009"
#> [253] "nonparametric" "bayes" "modeling"
#> [256] "of" "multivariate" "categorical"
#> [259] "data" "journal" "of"
#> [262] "the" "american" "statistical"
#> [265] "association" "104" "1042"
#> [268] "1051" "fesco" "r"
#> [271] "s" "frase" "m"
#> [274] "j" "and" "kannankutty"
#> [277] "n" "2012" "using"
#> [280] "the" "american" "commu"
#> [283] "nity" "survey" "as"
#> [286] "the" "sampling" "frame"
#> [289] "for" "the" "national"
#> [292] "survey" "of" "college"
#> [295] "graduates" "working" "paper"
#> [298] "ncses" "12" "201"
#> [301] "national" "science" "foundation"
#> [304] "national" "center" "for"
#> [307] "science" "and" "engineering"
#> [310] "statistics" "arlington" "va"
#> [313] "27"
#>
#>
#> [[28]]
#> [[28]][[1]]
#> [1] "finamore" "j" "2013" "national"
#> [5] "survey" "of" "college" "graduates"
#> [9] "about" "the" "survey" "na"
#> [13] "tional" "center" "for" "science"
#> [17] "and" "engineering" "statistics" "fosdick"
#> [21] "b" "k" "deyoreo" "m"
#> [25] "and" "reiter" "j" "p"
#> [29] "2016" "categorical" "data" "fusion"
#> [33] "using" "auxiliary" "information" "annals"
#> [37] "of" "applied" "statistics" "to"
#> [41] "appear" "he" "y" "landrum"
#> [45] "m" "b" "and" "zaslavksy"
#> [49] "a" "m" "2014" "combining"
#> [53] "information" "from" "two" "data"
#> [57] "sources" "with" "misreporting" "and"
#> [61] "incompleteness" "to" "assess" "hospice"
#> [65] "use" "among" "cancer" "patients"
#> [69] "a" "multiple" "imputation" "appraoch"
#> [73] "statistics" "in" "medicine" "33"
#> [77] "3710" "3724" "hirano" "k"
#> [81] "imbens" "g" "ridder" "g"
#> [85] "and" "rubin" "d" "2001"
#> [89] "combining" "panel" "data" "sets"
#> [93] "with" "attrition" "and" "refreshment"
#> [97] "samples" "econometrica" "69" "1645"
#> [101] "1659" "kim" "h" "j"
#> [105] "cox" "l" "h" "karr"
#> [109] "a" "f" "reiter" "j"
#> [113] "p" "and" "wang" "q"
#> [117] "2015" "simultane" "ous" "edit"
#> [121] "imputation" "for" "continuous" "microdata"
#> [125] "journal" "of" "the" "american"
#> [129] "statistical" "association" "110" "987"
#> [133] "999" "lohr" "s" "l"
#> [137] "2010" "sampling" "design" "and"
#> [141] "analysis" "boston" "ma" "brooks"
#> [145] "cole" "2nd" "ed" "manrique"
#> [149] "vallier" "d" "and" "reiter"
#> [153] "j" "p" "2016" "bayesian"
#> [157] "simultaneous" "edit" "and" "impu"
#> [161] "tation" "for" "multivariate" "categorical"
#> [165] "data" "journal" "of" "the"
#> [169] "american" "statistical" "asso" "ciation"
#> [173] "to" "appear" "moriarity" "c"
#> [177] "and" "scheuren" "f" "2001"
#> [181] "statistical" "matching" "a" "paradigm"
#> [185] "for" "assessing" "the" "uncertainty"
#> [189] "in" "the" "procedure" "journal"
#> [193] "of" "official" "statistics" "17"
#> [197] "407" "422" "national" "science"
#> [201] "foundation" "1993" "national" "survey"
#> [205] "of" "college" "graduates" "1993"
#> [209] "http" "doi.org" "10.3886" "icpsr06880"
#> [213] "v1" "icpsr06880" "v1" "ann"
#> [217] "arbor" "mi" "inter" "university"
#> [221] "consortium" "for" "political" "and"
#> [225] "social" "research" "distributor" "2014"
#> [229] "10" "02" "pepe" "m"
#> [233] "s" "1992" "inference" "using"
#> [237] "surrogate" "outcome" "data" "and"
#> [241] "a" "validation" "sample" "biometrika"
#> [245] "79" "355" "365" "raghunathan"
#> [249] "t" "e" "2006" "combining"
#> [253] "information" "from" "multiple" "surveys"
#> [257] "for" "assess" "ing" "health"
#> [261] "disparities" "allgemeines" "statistisches" "archiv"
#> [265] "90" "515" "526" "rassler"
#> [269] "s" "2002" "statistical" "matching"
#> [273] "new" "york" "springer" "reiter"
#> [277] "j" "2008" "multiple" "imputation"
#> [281] "when" "records" "used" "for"
#> [285] "imputation" "are" "not" "used"
#> [289] "or" "disseminated" "for" "analysis"
#> [293] "biometrika" "95" "933" "946"
#> [297] "reiter" "j" "p" "2012"
#> [301] "bayesian" "finite" "population" "imputation"
#> [305] "for" "data" "fusion" "statistica"
#> [309] "sinica" "22" "795" "811"
#> [313] "rubin" "d" "b" "1986"
#> [317] "statistical" "matching" "using" "file"
#> [321] "concatenation" "with" "adjusted" "weights"
#> [325] "and" "multiple" "imputations" "journal"
#> [329] "of" "business" "economic" "statistics"
#> [333] "4" "87" "94" "28"
#>
#>
#> [[29]]
#> [[29]][[1]]
#> [1] "1987" "multiple" "imputation" "for"
#> [5] "nonresponse" "in" "surveys" "new"
#> [9] "york" "john" "wiley" "sons"
#> [13] "schenker" "n" "and" "raghunathan"
#> [17] "t" "e" "2007" "combining"
#> [21] "information" "from" "multiple" "surveys"
#> [25] "to" "enhance" "estimation" "of"
#> [29] "measures" "of" "health" "statistics"
#> [33] "in" "medicine" "26" "1802"
#> [37] "1811" "schenker" "n" "raghunathan"
#> [41] "t" "e" "and" "bondarenko"
#> [45] "i" "2010" "improving" "on"
#> [49] "analyses" "of" "self" "reported"
#> [53] "data" "in" "a" "large"
#> [57] "scale" "health" "survey" "by"
#> [61] "using" "information" "from" "an"
#> [65] "examination" "based" "survey" "statistics"
#> [69] "in" "medicine" "29" "533"
#> [73] "545" "schifeling" "t" "a"
#> [77] "cheng" "c" "reiter" "j"
#> [81] "p" "and" "hillygus" "d"
#> [85] "s" "2015" "accounting" "for"
#> [89] "nonignorable" "unit" "nonresponse" "and"
#> [93] "attrition" "in" "panel" "studies"
#> [97] "with" "refreshment" "samples" "journal"
#> [101] "of" "survey" "statistics" "and"
#> [105] "methodology" "3" "265" "295"
#> [109] "si" "y" "and" "reiter"
#> [113] "j" "2013" "nonparametric" "bayesian"
#> [117] "multiple" "imputation" "for" "incom"
#> [121] "plete" "categorical" "variables" "in"
#> [125] "large" "scale" "assessment" "surveys"
#> [129] "journal" "of" "educational" "and"
#> [133] "behavioral" "statistics" "38" "499"
#> [137] "521" "si" "y" "reiter"
#> [141] "j" "p" "and" "hillygus"
#> [145] "d" "s" "2015" "semi"
#> [149] "parametric" "selection" "models" "for"
#> [153] "potentially" "non" "ignorable" "attrition"
#> [157] "in" "panel" "studies" "with"
#> [161] "refreshment" "samples" "political" "analysis"
#> [165] "23" "92" "112" "siddique"
#> [169] "j" "reiter" "j" "p"
#> [173] "brincks" "a" "gibbons" "r"
#> [177] "d" "crespi" "c" "m"
#> [181] "and" "brown" "c" "h"
#> [185] "2015" "multiple" "imputation" "for"
#> [189] "harmonizing" "longitudinal" "non" "commensurate"
#> [193] "measures" "in" "individual" "participant"
#> [197] "data" "meta" "analysis" "statistics"
#> [201] "in" "medicine" "34" "3399"
#> [205] "3414" "tarmast" "g" "2001"
#> [209] "multivariate" "log" "normal" "distribution"
#> [213] "in" "international" "statistical" "institute"
#> [217] "seoul" "53rd" "session" "yucel"
#> [221] "r" "m" "and" "zaslavsky"
#> [225] "a" "m" "2005" "imputation"
#> [229] "of" "binary" "treatment" "variables"
#> [233] "with" "measurement" "error" "in"
#> [237] "administrative" "data" "journal" "of"
#> [241] "the" "american" "statistical" "association"
#> [245] "100" "1123" "1132" "29"
#>
#>
#> [[30]]
#> [[30]][[1]]
#> [1] "table" "4" "error" "rate"
#> [5] "estimates" "from" "different" "model"
#> [9] "specifications" "models" "1" "7"
#> [13] "are" "run" "for" "100,000"
#> [17] "mcmc" "iterations" "we" "save"
#> [21] "m" "50" "completed" "datasets"
#> [25] "under" "each" "model" "for"
#> [29] "each" "dataset" "we" "compute"
#> [33] "the" "estimated" "overall" "error"
#> [37] "rate" "estimated" "error" "rate"
#> [41] "by" "gender" "and" "imputed"
#> [45] "y" "and" "associated" "variances"
#> [49] "using" "ratio" "estimators" "that"
#> [53] "incorporate" "the" "acs" "final"
#> [57] "survey" "weights" "estimate" "estimate"
#> [61] "by" "group" "overall" "y"
#> [65] "ba" "y" "ma" "y"
#> [69] "prof" "y" "phd" "cia"
#> [73] "model" "male" "37" "36"
#> [77] "37" "76" "75" "76"
#> [81] "91" "91" "92" "94"
#> [85] "93" "95" "57" "55"
#> [89] "58" "female" "35" "35"
#> [93] "36" "72" "71" "72"
#> [97] "95" "94" "95" "97"
#> [101] "96" "97" "model" "1"
#> [105] "male" "05" "04" "06"
#> [109] "10" "08" "11" "18"
#> [113] "15" "21" "27" "23"
#> [117] "31" "17" "16" "19"
#> [121] "female" "05" "05" "06"
#> [125] "09" "08" "10" "18"
#> [129] "15" "21" "28" "24"
#> [133] "32" "model" "2" "male"
#> [137] "05" "04" "06" "18"
#> [141] "16" "21" "27" "18"
#> [145] "37" "36" "30" "42"
#> [149] "20" "18" "21" "female"
#> [153] "05" "05" "06" "12"
#> [157] "10" "14" "26" "20"
#> [161] "33" "41" "29" "53"
#> [165] "model" "3" "male" "05"
#> [169] "04" "06" "09" "08"
#> [173] "11" "17" "14" "20"
#> [177] "25" "21" "30" "17"
#> [181] "16" "19" "female" "05"
#> [185] "05" "06" "09" "08"
#> [189] "10" "17" "14" "20"
#> [193] "26" "21" "31" "model"
#> [197] "4" "male" "05" "04"
#> [201] "06" "19" "16" "23"
#> [205] "36" "26" "46" "36"
#> [209] "27" "45" "22" "20"
#> [213] "24" "female" "09" "08"
#> [217] "10" "14" "11" "17"
#> [221] "52" "44" "59" "55"
#> [225] "40" "70" "model" "5"
#> [229] "male" "07" "06" "08"
#> [233] "19" "16" "22" "23"
#> [237] "14" "32" "34" "27"
#> [241] "41" "22" "20" "24"
#> [245] "female" "09" "08" "10"
#> [249] "12" "09" "15" "50"
#> [253] "43" "57" "31" "17"
#> [257] "46" "model" "6" "male"
#> [261] "05" "05" "05" "09"
#> [265] "08" "10" "10" "09"
#> [269] "11" "10" "09" "11"
#> [273] "16" "14" "17" "female"
#> [277] "05" "04" "05" "06"
#> [281] "05" "07" "16" "14"
#> [285] "18" "07" "06" "09"
#> [289] "model" "7" "male" "01"
#> [293] "01" "01" "01" "00"
#> [297] "01" "00" "00" "01"
#> [301] "01" "00" "01" "11"
#> [305] "09" "13" "female" "01"
#> [309] "01" "01" "01" "01"
#> [313] "01" "01" "00" "01"
#> [317] "01" "00" "01" "30"
#>
#>
#> [[31]]
#> [[31]][[1]]
#> [1] "table" "5" "estimated" "mean"
#> [5] "and" "95" "confidence" "interval"
#> [9] "of" "reporting" "probabilities" "under"
#> [13] "model" "2" "and" "reporting"
#> [17] "probabilities" "by" "gender" "under"
#> [21] "model" "4" "z" "ba"
#> [25] "z" "ma" "z" "prof"
#> [29] "z" "phd" "y" "ba"
#> [33] "model" "2" "95" "87"
#> [37] "1.00" "04" "00" "11"
#> [41] "01" "00" "03" "model"
#> [45] "4" "male" "96" "90"
#> [49] "1.00" "02" "00" "07"
#> [53] "02" "00" "05" "model"
#> [57] "4" "female" "67" "58"
#> [61] "76" "30" "22" "38"
#> [65] "03" "00" "07" "y"
#> [69] "ma" "model" "2" "02"
#> [73] "00" "06" "51" "43"
#> [77] "59" "47" "39" "55"
#> [81] "model" "4" "male" "04"
#> [85] "00" "11" "57" "48"
#> [89] "66" "39" "31" "47"
#> [93] "model" "4" "female" "11"
#> [97] "00" "25" "39" "26"
#> [101] "52" "50" "40" "61"
#> [105] "y" "prof" "model" "2"
#> [109] "05" "00" "16" "69"
#> [113] "54" "83" "26" "14"
#> [117] "38" "model" "4" "male"
#> [121] "02" "00" "06" "69"
#> [125] "44" "94" "29" "04"
#> [129] "54" "model" "4" "female"
#> [133] "91" "79" "1.00" "06"
#> [137] "00" "16" "04" "00"
#> [141] "10" "y" "phd" "model"
#> [145] "2" "01" "00" "04"
#> [149] "39" "15" "63" "60"
#> [153] "36" "83" "model" "4"
#> [157] "male" "01" "00" "05"
#> [161] "21" "02" "39" "78"
#> [165] "60" "96" "model" "4"
#> [169] "female" "10" "00" "30"
#> [173] "77" "50" "1.00" "13"
#> [177] "00" "34" "y" "none"
#> [181] "model" "2" "95" "95"
#> [185] "96" "03" "03" "04"
#> [189] "01" "01" "01" "00"
#> [193] "00" "00" "model" "4"
#> [197] "male" "97" "96" "97"
#> [201] "03" "02" "03" "01"
#> [205] "00" "01" "00" "00"
#> [209] "00" "model" "4" "female"
#> [213] "96" "95" "97" "04"
#> [217] "03" "05" "00" "00"
#> [221] "00" "00" "00" "00"
#> [225] "31"
#>
#>