Ability to tokenize words.
convert_tokens(
x,
path = FALSE,
split_pdf = FALSE,
remove_hyphen = TRUE,
token_function = NULL
)
The text of the pdf file. This can be specified directly or the pdftools package is used to read the pdf file from a file path. To use the pdftools, the path argument must be set to TRUE.
An optional path designation for the location of the pdf to be converted to text. The pdftools package is used for this conversion.
TRUE/FALSE indicating whether to split the pdf using white space. This would be most useful with multicolumn pdf files. The split_pdf function attempts to recreate the column layout of the text into a single column starting with the left column and proceeding to the right.
TRUE/FALSE indicating whether hyphenated words should be adjusted to combine onto a single line. Default is TRUE.
This is a function from the tokenizers package. Default is the tokenize_words function.
A list of character vectors containing the tokens. More detail can be found looking at the documentation of the tokenizers package.
file <- system.file('pdf', '1610.00147.pdf', package = 'pdfsearch')
convert_tokens(file, path = TRUE)
#> [[1]]
#> [[1]][[1]]
#> [1] "data" "fusion" "for" "correcting"
#> [5] "measurement" "errors" "tracy" "schifeling"
#> [9] "jerome" "p" "reiter" "maria"
#> [13] "deyoreo" "arxiv" "1610.00147v1" "stat"
#> [17] "me" "1" "oct" "2016"
#> [21] "abstract" "often" "in" "surveys"
#> [25] "key" "items" "are" "subject"
#> [29] "to" "measurement" "errors" "given"
#> [33] "just" "the" "data" "it"
#> [37] "can" "be" "difficult" "to"
#> [41] "determine" "the" "distribution" "of"
#> [45] "this" "error" "process" "and"
#> [49] "hence" "to" "obtain" "accurate"
#> [53] "inferences" "that" "involve" "the"
#> [57] "error" "prone" "variables" "in"
#> [61] "some" "settings" "however" "analysts"
#> [65] "have" "access" "to" "a"
#> [69] "data" "source" "on" "different"
#> [73] "in" "dividuals" "with" "high"
#> [77] "quality" "measurements" "of" "the"
#> [81] "error" "prone" "survey" "items"
#> [85] "we" "present" "a" "data"
#> [89] "fusion" "framework" "for" "leveraging"
#> [93] "this" "information" "to" "improve"
#> [97] "infer" "ences" "in" "the"
#> [101] "error" "prone" "survey" "the"
#> [105] "basic" "idea" "is" "to"
#> [109] "posit" "models" "about" "the"
#> [113] "rates" "at" "which" "individuals"
#> [117] "make" "errors" "coupled" "with"
#> [121] "models" "for" "the" "values"
#> [125] "reported" "when" "errors" "are"
#> [129] "made" "this" "can" "avoid"
#> [133] "the" "unrealistic" "assumption" "of"
#> [137] "conditional" "independence" "typically" "used"
#> [141] "in" "data" "fusion" "we"
#> [145] "apply" "the" "approach" "on"
#> [149] "the" "re" "ported" "values"
#> [153] "of" "educational" "attainments" "in"
#> [157] "the" "american" "community" "survey"
#> [161] "using" "the" "national" "survey"
#> [165] "of" "college" "graduates" "as"
#> [169] "the" "high" "quality" "data"
#> [173] "source" "in" "doing" "so"
#> [177] "we" "account" "for" "the"
#> [181] "informative" "sampling" "design" "used"
#> [185] "to" "select" "the" "national"
#> [189] "survey" "of" "college" "graduates"
#> [193] "we" "also" "present" "a"
#> [197] "process" "for" "assessing" "the"
#> [201] "sensitivity" "of" "various" "analyses"
#> [205] "to" "different" "choices" "for"
#> [209] "the" "measurement" "error" "models"
#> [213] "supplemental" "material" "is" "available"
#> [217] "online" "key" "words" "fusion"
#> [221] "imputation" "measurement" "error" "missing"
#> [225] "survey" "this" "research" "was"
#> [229] "supported" "by" "the" "national"
#> [233] "science" "foundation" "under" "award"
#> [237] "ses" "11" "31897" "the"
#> [241] "authors" "wish" "to" "thank"
#> [245] "seth" "sanders" "for" "his"
#> [249] "input" "on" "informative" "prior"
#> [253] "specifications" "and" "mauricio" "sadinle"
#> [257] "for" "discussion" "that" "improved"
#> [261] "the" "strategy" "for" "accounting"
#> [265] "for" "the" "informative" "sample"
#> [269] "design" "1"
#>
#>
#> [[2]]
#> [[2]][[1]]
#> [1] "1" "introduction" "survey" "data"
#> [5] "often" "contain" "items" "that"
#> [9] "are" "subject" "to" "measurement"
#> [13] "errors" "for" "example" "some"
#> [17] "respondents" "might" "misunderstand" "a"
#> [21] "question" "or" "accidentally" "select"
#> [25] "the" "wrong" "response" "thereby"
#> [29] "providing" "values" "unequal" "to"
#> [33] "their" "factual" "values" "left"
#> [37] "uncorrected" "these" "measurement" "errors"
#> [41] "can" "result" "in" "degraded"
#> [45] "inferences" "kim" "et" "al"
#> [49] "2015" "unfor" "tunately" "the"
#> [53] "distribution" "of" "the" "measurement"
#> [57] "errors" "typically" "is" "not"
#> [61] "estimable" "from" "the" "survey"
#> [65] "data" "alone" "one" "either"
#> [69] "needs" "to" "make" "strong"
#> [73] "assumptions" "about" "the" "measure"
#> [77] "ment" "error" "process" "e"
#> [81] "g" "as" "in" "curran"
#> [85] "and" "hussong" "2009" "or"
#> [89] "leverage" "information" "from" "some"
#> [93] "other" "source" "of" "data"
#> [97] "as" "we" "do" "here"
#> [101] "one" "natural" "source" "of"
#> [105] "information" "is" "a" "validation"
#> [109] "sample" "i" "e" "a"
#> [113] "dataset" "with" "both" "the"
#> [117] "reported" "possibly" "erroneous" "values"
#> [121] "and" "the" "true" "values"
#> [125] "measured" "on" "the" "same"
#> [129] "individuals" "these" "individuals" "could"
#> [133] "be" "a" "subset" "of"
#> [137] "the" "original" "survey" "pepe"
#> [141] "1992" "yucel" "and" "zaslavsky"
#> [145] "2005" "or" "a" "completely"
#> [149] "distinct" "set" "raghunathan" "2006"
#> [153] "schenker" "and" "raghunathan" "2007"
#> [157] "schenker" "et" "al" "2010"
#> [161] "carrig" "et" "al" "2015"
#> [165] "with" "validation" "data" "one"
#> [169] "can" "model" "the" "relationship"
#> [173] "between" "the" "error" "prone"
#> [177] "and" "true" "values" "and"
#> [181] "use" "the" "model" "to"
#> [185] "replace" "the" "error" "prone"
#> [189] "items" "with" "multiply" "imputed"
#> [193] "plausible" "true" "values" "reiter"
#> [197] "2008" "siddique" "et" "al"
#> [201] "2015" "in" "many" "settings"
#> [205] "however" "it" "is" "not"
#> [209] "possible" "to" "obtain" "validation"
#> [213] "samples" "e" "g" "because"
#> [217] "it" "is" "too" "expensive"
#> [221] "or" "because" "someone" "other"
#> [225] "than" "the" "analyst" "collected"
#> [229] "the" "data" "in" "such"
#> [233] "cases" "another" "potential" "source"
#> [237] "of" "information" "is" "a"
#> [241] "separate" "gold" "stan" "dard"
#> [245] "dataset" "that" "includes" "true"
#> [249] "or" "at" "least" "very"
#> [253] "high" "quality" "measurements" "of"
#> [257] "the" "items" "subject" "to"
#> [261] "error" "but" "not" "the"
#> [265] "error" "prone" "measurements" "unlike"
#> [269] "validation" "sam" "ples" "the"
#> [273] "gold" "standard" "dataset" "alone"
#> [277] "does" "not" "provide" "enough"
#> [281] "information" "to" "estimate" "the"
#> [285] "relationship" "between" "the" "error"
#> [289] "prone" "and" "true" "values"
#> [293] "it" "only" "provides" "information"
#> [297] "about" "the" "distribution" "of"
#> [301] "the" "true" "values" "thus"
#> [305] "analysts" "are" "faced" "with"
#> [309] "a" "special" "case" "2"
#>
#>
#> [[3]]
#> [[3]][[1]]
#> [1] "of" "data" "fusion" "rubin"
#> [5] "1986" "moriarity" "and" "scheuren"
#> [9] "2001" "rassler" "2002" "d’orazio"
#> [13] "et" "al" "2006" "reiter"
#> [17] "2012" "fosdick" "et" "al"
#> [21] "2016" "i" "e" "integrating"
#> [25] "information" "from" "two" "databases"
#> [29] "with" "disjoint" "sets" "of"
#> [33] "individuals" "and" "distinct" "variables"
#> [37] "one" "default" "approach" "common"
#> [41] "in" "other" "data" "fusion"
#> [45] "contexts" "is" "to" "assume"
#> [49] "that" "the" "error" "prone"
#> [53] "and" "true" "values" "are"
#> [57] "conditionally" "independent" "given" "some"
#> [61] "set" "of" "variables" "x"
#> [65] "common" "to" "both" "the"
#> [69] "survey" "and" "gold" "standard"
#> [73] "data" "effectively" "this" "involves"
#> [77] "using" "the" "gold" "standard"
#> [81] "data" "to" "estimate" "a"
#> [85] "predictive" "model" "for" "the"
#> [89] "true" "values" "from" "x"
#> [93] "and" "applying" "the" "estimated"
#> [97] "model" "to" "impute" "replacements"
#> [101] "for" "all" "values" "of"
#> [105] "the" "error" "prone" "items"
#> [109] "in" "the" "survey" "however"
#> [113] "this" "conditional" "independence" "assumption"
#> [117] "completely" "disregards" "the" "information"
#> [121] "in" "the" "error" "prone"
#> [125] "values" "which" "sacrifices" "potentially"
#> [129] "useful" "information" "for" "example"
#> [133] "consider" "national" "surveys" "that"
#> [137] "ask" "people" "to" "report"
#> [141] "their" "educational" "attainment" "we"
#> [145] "might" "expect" "most" "people"
#> [149] "to" "report" "values" "accurately"
#> [153] "and" "only" "a" "modest"
#> [157] "fraction" "to" "make" "errors"
#> [161] "it" "does" "not" "make"
#> [165] "sense" "to" "alter" "every"
#> [169] "individual’s" "reported" "values" "in"
#> [173] "the" "survey" "as" "would"
#> [177] "be" "done" "using" "a"
#> [181] "conditional" "independence" "approach" "in"
#> [185] "this" "article" "we" "develop"
#> [189] "a" "framework" "for" "leveraging"
#> [193] "information" "from" "gold" "stan"
#> [197] "dard" "data" "to" "improve"
#> [201] "inferences" "in" "surveys" "subject"
#> [205] "to" "measurement" "errors" "the"
#> [209] "basic" "idea" "is" "to"
#> [213] "encode" "plausible" "assumptions" "about"
#> [217] "the" "error" "process" "e"
#> [221] "g" "most" "people" "do"
#> [225] "not" "make" "errors" "when"
#> [229] "reporting" "educational" "attainments" "and"
#> [233] "the" "reporting" "process" "e"
#> [237] "g" "when" "people" "make"
#> [241] "errors" "they" "are" "more"
#> [245] "likely" "to" "report" "higher"
#> [249] "attainments" "than" "actual" "into"
#> [253] "statistical" "models" "we" "couple"
#> [257] "those" "models" "with" "distributions"
#> [261] "for" "the" "un" "derlying"
#> [265] "true" "data" "values" "and"
#> [269] "use" "multiple" "imputation" "to"
#> [273] "create" "plausible" "corrections" "to"
#> [277] "the" "error" "prone" "survey"
#> [281] "values" "which" "then" "can"
#> [285] "be" "analyzed" "using" "the"
#> [289] "methods" "from" "ru" "bin"
#> [293] "1987" "this" "allows" "us"
#> [297] "to" "avoid" "unrealistic" "conditional"
#> [301] "independence" "assumptions" "in" "lieu"
#> [305] "of" "more" "scientifically" "defensible"
#> [309] "models" "the" "remainder" "of"
#> [313] "this" "article" "is" "organized"
#> [317] "as" "follows" "in" "section"
#> [321] "2" "we" "review" "an"
#> [325] "3"
#>
#>
#> [[4]]
#> [[4]][[1]]
#> [1] "example" "of" "misreporting" "of"
#> [5] "educational" "attainment" "in" "data"
#> [9] "collected" "by" "the" "census"
#> [13] "bureau" "so" "as" "to"
#> [17] "motivate" "the" "methodological" "developments"
#> [21] "in" "section" "3" "we"
#> [25] "intro" "duce" "the" "general"
#> [29] "framework" "for" "specifying" "measurement"
#> [33] "error" "models" "to" "leverage"
#> [37] "the" "information" "in" "gold"
#> [41] "standard" "data" "in" "section"
#> [45] "4" "we" "apply" "the"
#> [49] "framework" "to" "handle" "po"
#> [53] "tential" "measurement" "error" "in"
#> [57] "educational" "attainment" "in" "the"
#> [61] "2010" "american" "community" "survey"
#> [65] "acs" "using" "the" "2010"
#> [69] "national" "survey" "of" "college"
#> [73] "graduates" "nscg" "as" "a"
#> [77] "gold" "standard" "file" "in"
#> [81] "doing" "so" "we" "deal"
#> [85] "with" "a" "key" "complication"
#> [89] "in" "the" "data" "integration"
#> [93] "accounting" "for" "the" "informative"
#> [97] "sampling" "design" "used" "to"
#> [101] "sample" "the" "nscg" "we"
#> [105] "also" "demonstrate" "how" "the"
#> [109] "framework" "facilitates" "analysis" "of"
#> [113] "the" "sensitivity" "of" "conclusions"
#> [117] "to" "different" "measurement" "error"
#> [121] "model" "specifications" "in" "section"
#> [125] "5" "we" "provide" "a"
#> [129] "brief" "summary" "2" "misreporting"
#> [133] "in" "educational" "attainment" "to"
#> [137] "illustrate" "the" "potential" "for"
#> [141] "reporting" "errors" "in" "educational"
#> [145] "attainment" "that" "can" "arise"
#> [149] "in" "surveys" "we" "examine"
#> [153] "data" "from" "the" "1993"
#> [157] "nscg" "the" "1993" "nscg"
#> [161] "surveyed" "individuals" "who" "indicated"
#> [165] "on" "the" "1990" "census"
#> [169] "long" "form" "that" "they"
#> [173] "had" "at" "least" "a"
#> [177] "college" "degree" "fesco" "et"
#> [181] "al" "2012" "the" "questionnaire"
#> [185] "asked" "about" "educational" "attainment"
#> [189] "including" "detailed" "questions" "about"
#> [193] "educational" "histories" "these" "questions"
#> [197] "greatly" "reduce" "the" "possibility"
#> [201] "of" "respondent" "error" "so"
#> [205] "that" "the" "educational" "attainment"
#> [209] "values" "in" "the" "nscg"
#> [213] "can" "be" "considered" "a"
#> [217] "gold" "standard" "black" "et"
#> [221] "al" "2003" "the" "census"
#> [225] "long" "form" "in" "contrast"
#> [229] "did" "not" "include" "detailed"
#> [233] "follow" "up" "questions" "so"
#> [237] "that" "reported" "educational" "attainment"
#> [241] "is" "prone" "to" "measurement"
#> [245] "error" "the" "census" "bureau"
#> [249] "linked" "each" "individual" "in"
#> [253] "the" "nscg" "to" "their"
#> [257] "corresponding" "record" "in" "the"
#> [261] "long" "form" "data" "the"
#> [265] "linked" "file" "is" "available"
#> [269] "for" "download" "from" "the"
#> [273] "inter" "university" "consortium" "for"
#> [277] "political" "and" "social" "research"
#> [281] "national" "science" "foundation" "4"
#>
#>
#> [[5]]
#> [[5]][[1]]
#> [1] "table" "1" "unweighted" "cross" "tabulation"
#> [6] "of" "reported" "education" "in" "the"
#> [11] "nscg" "and" "census" "long" "form"
#> [16] "from" "the" "linked" "dataset" "ba"
#> [21] "stands" "for" "bachelor’s" "degree" "ma"
#> [26] "stands" "for" "master’s" "degree" "prof"
#> [31] "stands" "for" "professional" "degree" "and"
#> [36] "phd" "stands" "for" "ph" "d"
#> [41] "degree" "the" "14,319" "individuals" "in"
#> [46] "the" "group" "labeled" "no" "degree"
#> [51] "did" "not" "have" "a" "college"
#> [56] "degree" "despite" "reporting" "otherwise" "the"
#> [61] "51,396" "individuals" "in" "the" "group"
#> [66] "labeled" "other" "did" "not" "have"
#> [71] "one" "of" "ba" "ma" "prof"
#> [76] "phd" "and" "are" "discarded" "from"
#> [81] "subsequent" "analyses" "census" "reported" "education"
#> [86] "z" "ba" "ma" "prof" "phd"
#> [91] "total" "ba" "89580" "4109" "1241"
#> [96] "249" "95179" "nscg" "ma" "1218"
#> [101] "33928" "655" "526" "36327" "reported"
#> [106] "prof" "382" "359" "8648" "563"
#> [111] "9952" "education" "phd" "99" "193"
#> [116] "452" "6726" "7470" "total" "91279"
#> [121] "38589" "10996" "8064" "148928" "no"
#> [126] "degree" "10150" "1792" "2040" "337"
#> [131] "14319" "other" "33368" "10912" "4710"
#> [136] "2406" "51396" "1993" "because" "of"
#> [141] "the" "linkages" "we" "can" "characterize"
#> [146] "the" "actual" "measurement" "error" "mechanism"
#> [151] "for" "educational" "attainment" "in" "the"
#> [156] "1990" "long" "form" "data" "in"
#> [161] "the" "nscg" "we" "treat" "the"
#> [166] "highest" "degree" "of" "the" "three"
#> [171] "most" "recent" "degrees" "reported" "coded"
#> [176] "as" "ed6c1" "ed6c2" "and" "ed6c3"
#> [181] "in" "the" "file" "as" "the"
#> [186] "true" "education" "level" "we" "disregard"
#> [191] "any" "degrees" "earned" "in" "the"
#> [196] "years" "1990" "1993" "as" "these"
#> [201] "occur" "in" "the" "three" "year"
#> [206] "gap" "between" "collection" "of" "the"
#> [211] "long" "form" "and" "nscg" "data"
#> [216] "this" "ensures" "consistent" "time" "frames"
#> [221] "for" "the" "nscg" "and" "long"
#> [226] "form" "reported" "values" "we" "cross"
#> [231] "tabulate" "these" "degrees" "with" "the"
#> [236] "degrees" "reported" "in" "the" "long"
#> [241] "form" "data" "coded" "yearsch" "in"
#> [246] "the" "file" "table" "1" "displays"
#> [251] "the" "cross" "tabulation" "a" "similar"
#> [256] "analysis" "was" "done" "by" "black"
#> [261] "et" "al" "2003" "as" "evident"
#> [266] "in" "table" "1" "reported" "education"
#> [271] "levels" "on" "the" "long" "form"
#> [276] "often" "are" "higher" "than" "those"
#> [281] "on" "the" "nscg" "particularly" "for"
#> [286] "individuals" "with" "only" "a" "bachelor’s"
#> [291] "degree" "of" "the" "163,247" "individuals"
#> [296] "in" "scope" "in" "the" "nscg"
#> [301] "over" "14,000" "were" "determined" "not"
#> [306] "to" "have" "at" "least" "a"
#> [311] "bachelor’s" "degree" "when" "asked" "in"
#> [316] "the" "nscg" "despite" "reporting" "otherwise"
#> [321] "5"
#>
#>
#> [[6]]
#> [[6]][[1]]
#> [1] "in" "the" "long" "form"
#> [5] "a" "whopping" "33" "of"
#> [9] "individuals" "who" "reported" "being"
#> [13] "professionals" "in" "the" "long"
#> [17] "form" "actually" "are" "not"
#> [21] "professionals" "according" "to" "the"
#> [25] "nscg" "one" "possible" "explanation"
#> [29] "for" "this" "error" "is"
#> [33] "confusion" "over" "the" "definition"
#> [37] "of" "professionals" "the" "census"
#> [41] "bureau" "intended" "the" "category"
#> [45] "to" "capture" "graduate" "degrees"
#> [49] "from" "universities" "e" "g"
#> [53] "j" "d" "m" "b"
#> [57] "a" "m" "d" "whereas"
#> [61] "black" "et" "al" "2003"
#> [65] "found" "that" "individuals" "in"
#> [69] "professions" "such" "as" "cosmetology"
#> [73] "nursing" "and" "health" "services"
#> [77] "which" "require" "certifications" "but"
#> [81] "not" "graduate" "degrees" "selected"
#> [85] "the" "category" "in" "spite"
#> [89] "of" "the" "nontrivial" "reporting"
#> [93] "error" "the" "overwhelming" "majority"
#> [97] "of" "individuals" "reported" "education"
#> [101] "levels" "are" "consistent" "in"
#> [105] "the" "long" "form" "and"
#> [109] "in" "the" "nscg" "of"
#> [113] "the" "individuals" "in" "the"
#> [117] "nscg" "who" "had" "at"
#> [121] "least" "a" "college" "degree"
#> [125] "at" "the" "time" "of"
#> [129] "the" "1990" "census" "about"
#> [133] "93.3" "of" "them" "have"
#> [137] "the" "same" "contemporaneous" "education"
#> [141] "levels" "in" "both" "files"
#> [145] "this" "suggests" "that" "most"
#> [149] "people" "report" "correctly" "an"
#> [153] "observation" "we" "want" "to"
#> [157] "leverage" "when" "constructing" "measurement"
#> [161] "error" "models" "for" "education"
#> [165] "in" "the" "2010" "acs"
#> [169] "in" "most" "situations" "we"
#> [173] "do" "not" "have" "the"
#> [177] "good" "fortune" "of" "observing"
#> [181] "individuals" "error" "prone" "and"
#> [185] "true" "values" "simultaneously" "instead"
#> [189] "we" "are" "in" "the"
#> [193] "setting" "represented" "by" "figure"
#> [197] "1" "this" "is" "also"
#> [201] "the" "case" "in" "our"
#> [205] "analysis" "of" "educational" "attainments"
#> [209] "in" "the" "2010" "acs"
#> [213] "described" "in" "section" "4"
#> [217] "the" "sampling" "frame" "for"
#> [221] "the" "2010" "nscg" "is"
#> [225] "constructed" "from" "reported" "education"
#> [229] "levels" "in" "the" "acs"
#> [233] "which" "replaced" "the" "long"
#> [237] "form" "after" "the" "2000"
#> [241] "census" "however" "unlike" "in"
#> [245] "1993" "linked" "data" "are"
#> [249] "not" "available" "as" "public"
#> [253] "use" "files" "therefore" "we"
#> [257] "treat" "the" "2010" "nscg"
#> [261] "as" "gold" "standard" "data"
#> [265] "and" "posit" "measurement" "models"
#> [269] "that" "connect" "the" "information"
#> [273] "from" "the" "two" "data"
#> [277] "sources" "using" "the" "framework"
#> [281] "that" "we" "now" "describe"
#> [285] "6"
#>
#>
#> [[7]]
#> [[7]][[1]]
#> [1] "x" "y" "z" "de"
#> [5] "x" "x" "dg" "x"
#> [9] "x" "figure" "1" "graphical"
#> [13] "representation" "of" "data" "fusion"
#> [17] "set" "up" "in" "the"
#> [21] "survey" "data" "de" "we"
#> [25] "only" "observe" "the" "error"
#> [29] "prone" "measurement" "z" "but"
#> [33] "not" "the" "true" "value"
#> [37] "y" "in" "the" "gold"
#> [41] "standard" "data" "dg" "we"
#> [45] "only" "observe" "y" "but"
#> [49] "not" "z" "we" "observe"
#> [53] "variables" "x" "in" "both"
#> [57] "samples" "3" "measurement" "error"
#> [61] "modeling" "via" "data" "fusion"
#> [65] "as" "in" "figure" "1"
#> [69] "let" "de" "and" "dg"
#> [73] "be" "two" "data" "sources"
#> [77] "comprising" "distinct" "individuals" "with"
#> [81] "sample" "sizes" "ne" "and"
#> [85] "ng" "respectively" "for" "each"
#> [89] "individual" "i" "in" "dg"
#> [93] "or" "de" "let" "xi"
#> [97] "xi1" "xip" "be" "variables"
#> [101] "common" "to" "both" "surveys"
#> [105] "such" "as" "demographic" "variables"
#> [109] "we" "assume" "these" "variables"
#> [113] "have" "been" "harmonized" "d’orazio"
#> [117] "et" "al" "2006" "across"
#> [121] "dg" "and" "de" "and"
#> [125] "are" "free" "of" "errors"
#> [129] "let" "y" "represent" "the"
#> [133] "error" "free" "values" "of"
#> [137] "some" "variable" "of" "interest"
#> [141] "and" "let" "z" "be"
#> [145] "an" "error" "prone" "version"
#> [149] "of" "y" "we" "observe"
#> [153] "z" "but" "not" "y"
#> [157] "for" "the" "ne" "individuals"
#> [161] "in" "de" "we" "observe"
#> [165] "y" "but" "not" "z"
#> [169] "for" "the" "ng" "individuals"
#> [173] "in" "dg" "for" "simplicity"
#> [177] "of" "notation" "we" "assume"
#> [181] "no" "missing" "values" "in"
#> [185] "any" "variable" "although" "the"
#> [189] "multiple" "imputation" "framework" "easily"
#> [193] "handles" "missing" "values" "additionally"
#> [197] "de" "can" "include" "variables"
#> [201] "for" "which" "there" "is"
#> [205] "no" "corresponding" "variable" "in"
#> [209] "dg" "these" "variables" "do"
#> [213] "not" "play" "a" "role"
#> [217] "in" "the" "measurement" "error"
#> [221] "modeling" "although" "they" "can"
#> [225] "be" "used" "in" "multiple"
#> [229] "imputation" "inferences" "we" "seek"
#> [233] "to" "estimate" "pr" "y"
#> [237] "z" "x" "and" "use"
#> [241] "it" "to" "create" "multiple"
#> [245] "imputations" "for" "the" "missing"
#> [249] "values" "in" "y" "for"
#> [253] "the" "individuals" "in" "de"
#> [257] "we" "do" "so" "for"
#> [261] "the" "common" "setting" "where"
#> [265] "x" "y" "z" "are"
#> [269] "all" "categorical" "variables" "similar"
#> [273] "ideas" "apply" "for" "other"
#> [277] "data" "types" "for" "j"
#> [281] "1" "p" "let" "each"
#> [285] "xj" "have" "dj" "levels"
#> [289] "let" "z" "have" "dz"
#> [293] "levels" "and" "y" "have"
#> [297] "dy" "7"
#>
#>
#> [[8]]
#> [[8]][[1]]
#> [1] "levels" "typically" "dz" "dy"
#> [5] "but" "this" "need" "not"
#> [9] "be" "the" "case" "generally"
#> [13] "for" "example" "in" "the"
#> [17] "nscg" "acs" "application" "z"
#> [21] "is" "the" "educational" "attainment"
#> [25] "among" "those" "who" "report"
#> [29] "a" "college" "degree" "in"
#> [33] "the" "acs" "which" "has"
#> [37] "dz" "4" "levels" "bachelor’s"
#> [41] "degree" "master’s" "degree" "professional"
#> [45] "degree" "or" "ph" "d"
#> [49] "degree" "and" "y" "is"
#> [53] "the" "educational" "attainment" "in"
#> [57] "the" "nscg" "which" "has"
#> [61] "dy" "5" "levels" "an"
#> [65] "additional" "level" "is" "needed"
#> [69] "because" "some" "individuals" "in"
#> [73] "the" "nscg" "truly" "do"
#> [77] "not" "have" "a" "college"
#> [81] "degree" "for" "all" "i"
#> [85] "de" "let" "ei" "be"
#> [89] "an" "unobserved" "indicator" "of"
#> [93] "a" "reporting" "error" "that"
#> [97] "is" "ei" "1" "when"
#> [101] "yi" "6" "zi" "and"
#> [105] "ei" "0" "otherwise" "using"
#> [109] "e" "enables" "us" "to"
#> [113] "write" "pr" "y" "z"
#> [117] "x" "as" "a" "product"
#> [121] "of" "three" "sub" "models"
#> [125] "for" "individual" "i" "the"
#> [129] "full" "data" "likelihood" "omitting"
#> [133] "parameters" "for" "simplicity" "can"
#> [137] "be" "factored" "as" "pr"
#> [141] "yi" "k" "zi" "l"
#> [145] "xi" "pr" "yi" "k"
#> [149] "xi" "pr" "ei" "e"
#> [153] "yi" "k" "xi" "pr"
#> [157] "zi" "l" "ei" "e"
#> [161] "yi" "k" "xi" "1"
#> [165] "this" "separates" "the" "true"
#> [169] "data" "generation" "process" "and"
#> [173] "the" "measurement" "error" "generation"
#> [177] "process" "which" "facilitates" "model"
#> [181] "specification" "in" "particular" "we"
#> [185] "can" "use" "dg" "to"
#> [189] "estimate" "the" "true" "data"
#> [193] "distribution" "pr" "y" "x"
#> [197] "we" "then" "can" "posit"
#> [201] "different" "models" "for" "the"
#> [205] "rates" "of" "making" "errors"
#> [209] "pr" "ei" "e" "yi"
#> [213] "k" "xi" "and" "for"
#> [217] "the" "reported" "values" "when"
#> [221] "errors" "are" "made" "pr"
#> [225] "zi" "l" "ei" "1"
#> [229] "yi" "k" "xi" "intuitively"
#> [233] "the" "error" "model" "locates"
#> [237] "the" "records" "for" "which"
#> [241] "yi" "6" "zi" "and"
#> [245] "the" "reporting" "model" "captures"
#> [249] "the" "patterns" "of" "misreported"
#> [253] "zi" "of" "course" "when"
#> [257] "ei" "0" "pr" "zi"
#> [261] "yi" "1" "a" "similar"
#> [265] "factorization" "is" "used" "by"
#> [269] "yucel" "and" "zaslavsky" "2005"
#> [273] "he" "et" "al" "2014"
#> [277] "kim" "et" "al" "2015"
#> [281] "and" "manrique" "vallier" "and"
#> [285] "reiter" "2016" "among" "others"
#> [289] "by" "construction" "dg" "and"
#> [293] "de" "cannot" "be" "used"
#> [297] "to" "estimate" "any" "of"
#> [301] "the" "conditional" "probabilities" "pr"
#> [305] "y" "z" "x" "directly"
#> [309] "hence" "we" "have" "to"
#> [313] "restrict" "the" "number" "and"
#> [317] "types" "of" "parameters" "in"
#> [321] "the" "sub" "models" "in"
#> [325] "1" "put" "another" "way"
#> [329] "if" "we" "tried" "to"
#> [333] "estimate" "a" "fully" "8"
#>
#>
#> [[9]]
#> [[9]][[1]]
#> [1] "saturated" "model" "for" "e"
#> [5] "z" "x" "we" "would"
#> [9] "not" "be" "able" "to"
#> [13] "identify" "all" "the" "parameters"
#> [17] "by" "using" "dg" "and"
#> [21] "de" "alone" "to" "see"
#> [25] "this" "assume" "for" "the"
#> [29] "moment" "that" "all" "dx"
#> [33] "πpj" "1" "dj" "possible"
#> [37] "combinations" "of" "x" "are"
#> [41] "present" "in" "dg" "and"
#> [45] "de" "to" "estimate" "the"
#> [49] "distribution" "of" "e" "z"
#> [53] "x" "using" "a" "fully"
#> [57] "saturated" "model" "we" "require"
#> [61] "dy" "1" "dx" "dz"
#> [65] "1" "dy" "dx" "dy"
#> [69] "dz" "1" "dx" "independent"
#> [73] "pieces" "of" "information" "from"
#> [77] "dg" "de" "where" "each"
#> [81] "subtraction" "of" "one" "derives"
#> [85] "from" "the" "requirement" "that"
#> [89] "probabilities" "sum" "to" "one"
#> [93] "however" "dg" "and" "de"
#> [97] "together" "provide" "only" "dz"
#> [101] "1" "dx" "dy" "1"
#> [105] "dx" "dx" "dz" "dy"
#> [109] "1" "dx" "independent" "pieces"
#> [113] "of" "information" "where" "we"
#> [117] "add" "a" "dx" "to"
#> [121] "properly" "account" "for" "the"
#> [125] "sum" "to" "one" "constraint"
#> [129] "a" "key" "insight" "here"
#> [133] "is" "that" "since" "the"
#> [137] "true" "data" "model" "requires"
#> [141] "dy" "dx" "parameters" "to"
#> [145] "estimate" "the" "joint" "distribution"
#> [149] "for" "y" "x" "the"
#> [153] "data" "can" "identify" "at"
#> [157] "most" "dz" "1" "dx"
#> [161] "parameters" "in" "the" "error"
#> [165] "and" "reporting" "models" "combined"
#> [169] "related" "identification" "issues" "arise"
#> [173] "in" "the" "context" "of"
#> [177] "refreshment" "sampling" "to" "adjust"
#> [181] "for" "nonignorable" "attrition" "in"
#> [185] "longitudinal" "studies" "hirano" "et"
#> [189] "al" "2001" "schifeling" "et"
#> [193] "al" "2015" "si" "et"
#> [197] "al" "2015" "3.1" "true"
#> [201] "data" "model" "pr" "yi"
#> [205] "k" "xi" "one" "can"
#> [209] "use" "any" "model" "for"
#> [213] "y" "x" "that" "adequately"
#> [217] "describes" "the" "conditional" "distri"
#> [221] "bution" "such" "as" "a"
#> [225] "multinomial" "logistic" "regression" "in"
#> [229] "the" "nscg" "acs" "application"
#> [233] "we" "use" "a" "fully"
#> [237] "saturated" "multinomial" "model" "accounting"
#> [241] "for" "the" "informative" "sampling"
#> [245] "design" "in" "dg" "using"
#> [249] "the" "approach" "described" "in"
#> [253] "section" "4.1" "one" "also"
#> [257] "could" "use" "a" "joint"
#> [261] "distribution" "for" "y" "x"
#> [265] "such" "as" "a" "log"
#> [269] "linear" "model" "or" "a"
#> [273] "mixture" "of" "multinomials" "model"
#> [277] "dunson" "and" "xing" "2009"
#> [281] "si" "and" "reiter" "2013"
#> [285] "9"
#>
#>
#> [[10]]
#> [[10]][[1]]
#> [1] "3.2" "error" "model" "pr"
#> [5] "ei" "1" "yi" "xi"
#> [9] "in" "cases" "where" "dy"
#> [13] "dz" "a" "generic" "form"
#> [17] "for" "the" "error" "model"
#> [21] "is" "pr" "ei" "1"
#> [25] "xi" "yi" "k" "g"
#> [29] "xi" "yi" "β" "2"
#> [33] "where" "g" "xi" "yi"
#> [37] "β" "is" "some" "function"
#> [41] "of" "its" "arguments" "and"
#> [45] "β" "is" "some" "set"
#> [49] "of" "unknown" "parameters" "a"
#> [53] "convenient" "class" "of" "functions"
#> [57] "that" "we" "use" "here"
#> [61] "is" "the" "logistic" "regression"
#> [65] "of" "ei" "on" "some"
#> [69] "design" "vector" "mi" "derived"
#> [73] "from" "xi" "yi" "with"
#> [77] "corresponding" "coefficients" "β" "the"
#> [81] "analyst" "can" "encode" "different"
#> [85] "versions" "of" "mi" "to"
#> [89] "represent" "assumptions" "about" "the"
#> [93] "error" "process" "the" "simplest"
#> [97] "specification" "is" "to" "set"
#> [101] "each" "mi" "equal" "to"
#> [105] "a" "vector" "of" "ones"
#> [109] "which" "implies" "that" "there"
#> [113] "is" "a" "common" "probability"
#> [117] "of" "error" "for" "all"
#> [121] "individuals" "this" "error" "model"
#> [125] "makes" "sense" "when" "the"
#> [129] "analyst" "believes" "the" "errors"
#> [133] "in" "z" "occur" "completely"
#> [137] "at" "random" "for" "example"
#> [141] "when" "errors" "arise" "simply"
#> [145] "because" "respondents" "accidentally" "and"
#> [149] "randomly" "select" "the" "wrong"
#> [153] "response" "in" "the" "survey"
#> [157] "or" "when" "all" "respondents"
#> [161] "are" "equally" "likely" "to"
#> [165] "misunderstand" "the" "survey" "question"
#> [169] "a" "more" "realistic" "possibility"
#> [173] "is" "to" "allow" "the"
#> [177] "probability" "of" "error" "to"
#> [181] "depend" "on" "some" "variables"
#> [185] "in" "xi" "but" "not"
#> [189] "on" "yi" "e" "g"
#> [193] "men" "misreport" "education" "at"
#> [197] "different" "rates" "than" "women"
#> [201] "this" "could" "be" "encoded"
#> [205] "by" "including" "an" "intercept"
#> [209] "for" "one" "of" "the"
#> [213] "sexes" "in" "mi" "finally"
#> [217] "one" "can" "allow" "the"
#> [221] "probability" "of" "error" "to"
#> [225] "depend" "on" "yi" "itself"
#> [229] "for" "example" "people" "who"
#> [233] "truly" "do" "not" "have"
#> [237] "at" "least" "a" "college"
#> [241] "degree" "are" "more" "likely"
#> [245] "to" "misreport" "by" "including"
#> [249] "some" "function" "of" "it"
#> [253] "in" "mi" "in" "the"
#> [257] "case" "where" "dz" "6"
#> [261] "dy" "as" "in" "the"
#> [265] "nscg" "acs" "application" "we"
#> [269] "automatically" "set" "ei" "1"
#> [273] "for" "any" "individual" "with"
#> [277] "yi" "1" "dz" "for"
#> [281] "example" "we" "set" "ei"
#> [285] "1" "for" "all" "individuals"
#> [289] "who" "are" "determined" "in"
#> [293] "the" "nscg" "not" "to"
#> [297] "have" "a" "college" "degree"
#> [301] "but" "report" "so" "in"
#> [305] "the" "acs" "the" "stochastic"
#> [309] "part" "of" "the" "error"
#> [313] "model" "only" "applies" "to"
#> [317] "individuals" "who" "truly" "have"
#> [321] "at" "least" "a" "bachelor’s"
#> [325] "degree" "10"
#>
#>
#> [[11]]
#> [[11]][[1]]
#> [1] "3.3" "reporting" "model" "pr"
#> [5] "zi" "ei" "1" "yi"
#> [9] "xi" "when" "there" "is"
#> [13] "no" "reporting" "error" "for"
#> [17] "individual" "i" "i" "e"
#> [21] "ei" "0" "we" "know"
#> [25] "that" "zi" "yi" "when"
#> [29] "there" "is" "a" "reporting"
#> [33] "error" "we" "must" "model"
#> [37] "the" "reported" "value" "zi"
#> [41] "as" "with" "2" "one"
#> [45] "can" "posit" "a" "variety"
#> [49] "of" "distributions" "for" "the"
#> [53] "reporting" "error" "which" "is"
#> [57] "some" "function" "h" "xi"
#> [61] "yi" "α" "with" "parameters"
#> [65] "α" "we" "now" "describe"
#> [69] "a" "few" "reporting" "error"
#> [73] "models" "for" "illustration" "one"
#> [77] "could" "use" "more" "complicated"
#> [81] "models" "e" "g" "based"
#> [85] "on" "multinomial" "logistic" "regression"
#> [89] "as" "well" "a" "simple"
#> [93] "model" "assumes" "that" "values"
#> [97] "of" "zi" "are" "equally"
#> [101] "likely" "as" "in" "manrique"
#> [105] "vallier" "and" "reiter" "2016"
#> [109] "we" "have" "1" "dz"
#> [113] "1" "if" "l" "6"
#> [117] "k" "k" "1" "dz"
#> [121] "pr" "zi" "l" "xi"
#> [125] "yi" "k" "ei" "1"
#> [129] "1" "dz" "if" "k"
#> [133] "1" "dz" "3" "0"
#> [137] "otherwise" "such" "a" "reporting"
#> [141] "model" "could" "be" "reasonable"
#> [145] "when" "reporting" "errors" "are"
#> [149] "due" "to" "clerical" "errors"
#> [153] "we" "note" "that" "this"
#> [157] "model" "does" "not" "accurately"
#> [161] "characterize" "the" "reporting" "errors"
#> [165] "in" "the" "1993" "linked"
#> [169] "nscg" "data" "per" "table"
#> [173] "1" "alternatively" "one" "can"
#> [177] "allow" "the" "probabilities" "to"
#> [181] "depend" "on" "yi" "so"
#> [185] "that" "zi" "xi" "yi"
#> [189] "k" "ei" "1" "categorical"
#> [193] "pk" "1" "pk" "dz"
#> [197] "4" "where" "each" "pk"
#> [201] "l" "is" "the" "probability"
#> [205] "of" "reporting" "z" "l"
#> [209] "given" "that" "y" "k"
#> [213] "and" "pk" "k" "0"
#> [217] "one" "can" "further" "parameterize"
#> [221] "the" "reporting" "model" "so"
#> [225] "that" "the" "reporting" "probabilities"
#> [229] "vary" "with" "x" "for"
#> [233] "example" "to" "make" "the"
#> [237] "probabilities" "vary" "with" "sex"
#> [241] "and" "true" "education" "11"
#>
#>
#> [[12]]
#> [[12]][[1]]
#> [1] "values" "we" "can" "use"
#> [5] "categorical" "pm" "k" "1"
#> [9] "pm" "k" "dz" "if"
#> [13] "xi" "sex" "m" "zi"
#> [17] "xi" "yi" "k" "ei"
#> [21] "1" "5" "categorical" "p"
#> [25] "f" "k" "1" "pf"
#> [29] "k" "dz" "if" "xi"
#> [33] "sex" "f" "3.4" "specifying"
#> [37] "and" "estimating" "the" "model"
#> [41] "as" "apparent" "in" "sections"
#> [45] "3.2" "and" "3.3" "the"
#> [49] "error" "and" "reporting" "models"
#> [53] "can" "take" "on" "many"
#> [57] "specifications" "without" "linked" "data"
#> [61] "analysts" "cannot" "use" "exploratory"
#> [65] "data" "analysis" "to" "inform"
#> [69] "the" "model" "choice" "instead"
#> [73] "we" "recommend" "that" "analysts"
#> [77] "posit" "scientifically" "defensible" "measurement"
#> [81] "error" "models" "and" "make"
#> [85] "post" "hoc" "checks" "of"
#> [89] "the" "sensibility" "of" "analyses"
#> [93] "from" "those" "models" "we"
#> [97] "demonstrate" "this" "approach" "in"
#> [101] "section" "4" "for" "example"
#> [105] "analysts" "can" "check" "whether"
#> [109] "or" "not" "the" "predicted"
#> [113] "probabilities" "of" "errors" "implied"
#> [117] "by" "the" "model" "seem"
#> [121] "plausible" "as" "another" "diagnostic"
#> [125] "analysts" "can" "compare" "the"
#> [129] "distribution" "of" "the" "imputed"
#> [133] "values" "of" "y" "x"
#> [137] "in" "de" "to" "the"
#> [141] "empirical" "distribution" "of" "y"
#> [145] "x" "in" "dg" "this"
#> [149] "is" "akin" "to" "diagnostics"
#> [153] "in" "multiple" "imputation" "for"
#> [157] "missing" "data" "that" "compare"
#> [161] "imputed" "and" "observed" "values"
#> [165] "abayomi" "et" "al" "2008"
#> [169] "when" "these" "distributions" "differ"
#> [173] "substantially" "it" "suggests" "the"
#> [177] "measurement" "error" "model" "specification"
#> [181] "or" "possibly" "the" "true"
#> [185] "data" "model" "is" "inadequate"
#> [189] "such" "diagnostic" "checks" "only"
#> [193] "can" "reveal" "problems" "with"
#> [197] "the" "model" "specification" "they"
#> [201] "do" "not" "indicate" "that"
#> [205] "a" "particular" "specification" "is"
#> [209] "correct" "more" "generally" "it"
#> [213] "is" "prudent" "to" "keep"
#> [217] "the" "restrictions" "on" "the"
#> [221] "number" "of" "identifiable" "parameters"
#> [225] "in" "mind" "when" "specifying"
#> [229] "the" "models" "at" "most"
#> [233] "one" "can" "identify" "the"
#> [237] "equiv" "alent" "of" "dz"
#> [241] "1" "dx" "parameters" "in"
#> [245] "the" "combined" "model" "for"
#> [249] "ei" "zi" "xi" "generally"
#> [253] "for" "ease" "of" "specification"
#> [257] "and" "interpretation" "we" "favor"
#> [261] "rich" "error" "models" "e"
#> [265] "g" "with" "mi" "including"
#> [269] "variables" "in" "xi" "and"
#> [273] "yi" "coupled" "with" "simple"
#> [277] "reporting" "models" "like" "those"
#> [281] "in" "section" "3.3" "the"
#> [285] "exact" "strategy" "for" "estimating"
#> [289] "the" "model" "depends" "on"
#> [293] "the" "features" "of" "dg"
#> [297] "and" "de" "12"
#>
#>
#> [[13]]
#> [[13]][[1]]
#> [1] "when" "both" "datasets" "can"
#> [5] "be" "treated" "as" "simple"
#> [9] "random" "samples" "we" "suggest"
#> [13] "using" "a" "fully" "bayesian"
#> [17] "approach" "after" "concatenating" "dg"
#> [21] "and" "de" "here" "one"
#> [25] "can" "use" "typical" "prior"
#> [29] "distributions" "for" "the" "true"
#> [33] "data" "and" "error" "models"
#> [37] "for" "reporting" "models" "like"
#> [41] "those" "in" "4" "and"
#> [45] "5" "it" "is" "convenient"
#> [49] "to" "use" "independent" "dirichlet"
#> [53] "priors" "for" "each" "pk"
#> [57] "1" "pk" "k" "1"
#> [61] "pk" "k" "1" "pk"
#> [65] "dz" "in" "the" "nscg"
#> [69] "acs" "application" "we" "create"
#> [73] "prior" "distributions" "for" "the"
#> [77] "reporting" "models" "using" "the"
#> [81] "information" "from" "table" "1"
#> [85] "absent" "such" "information" "analysts"
#> [89] "can" "use" "uniform" "prior"
#> [93] "distributions" "when" "it" "does"
#> [97] "not" "make" "sense" "to"
#> [101] "concatenate" "dg" "and" "de"
#> [105] "it" "can" "be" "convenient"
#> [109] "to" "use" "a" "multi"
#> [113] "stage" "estimation" "strategy" "when"
#> [117] "imputing" "missing" "y" "in"
#> [121] "de" "all" "of" "the"
#> [125] "information" "needed" "from" "dg"
#> [129] "is" "represented" "by" "the"
#> [133] "parameters" "of" "the" "true"
#> [137] "data" "model" "θ" "hence"
#> [141] "we" "first" "can" "construct"
#> [145] "a" "possibly" "approximate" "posterior"
#> [149] "distribution" "of" "θ" "using"
#> [153] "only" "dg" "we" "then"
#> [157] "sample" "many" "draws" "from"
#> [161] "this" "distribution" "we" "plug"
#> [165] "these" "draws" "in" "the"
#> [169] "gibbs" "sampling" "steps" "for"
#> [173] "a" "bayesian" "predictive" "distribution"
#> [177] "for" "yi" "zi" "xi"
#> [181] "θ" "for" "the" "cases"
#> [185] "in" "de" "thereby" "generating"
#> [189] "the" "multiple" "imputations" "we"
#> [193] "describe" "the" "gibbs" "sampler"
#> [197] "for" "this" "step" "for"
#> [201] "the" "nscg" "acs" "application"
#> [205] "in" "the" "supplementary" "material"
#> [209] "4" "adjusting" "for" "reporting"
#> [213] "errors" "in" "education" "in"
#> [217] "the" "2010" "acs" "we"
#> [221] "now" "use" "the" "framework"
#> [225] "to" "adjust" "inferences" "for"
#> [229] "potential" "reporting" "error" "in"
#> [233] "educa" "tional" "attainment" "in"
#> [237] "the" "2010" "acs" "using"
#> [241] "the" "public" "use" "microdata"
#> [245] "for" "the" "2010" "nscg"
#> [249] "as" "the" "gold" "standard"
#> [253] "file" "dg" "we" "consider"
#> [257] "two" "main" "analyses" "that"
#> [261] "could" "be" "affected" "by"
#> [265] "reporting" "error" "in" "education"
#> [269] "first" "we" "estimate" "from"
#> [273] "the" "acs" "the" "number"
#> [277] "of" "science" "and" "engineering"
#> [281] "degrees" "awarded" "to" "women"
#> [285] "we" "base" "the" "estimate"
#> [289] "on" "an" "indicator" "in"
#> [293] "the" "acs" "for" "whether"
#> [297] "or" "not" "each" "individual"
#> [301] "has" "such" "a" "degree"
#> [305] "second" "we" "examine" "13"
#>
#>
#> [[14]]
#> [[14]][[1]]
#> [1] "average" "incomes" "across" "degrees"
#> [5] "this" "focus" "is" "motivated"
#> [9] "in" "part" "by" "the"
#> [13] "findings" "of" "black" "et"
#> [17] "al" "2006" "2008" "who"
#> [21] "found" "that" "apparent" "wage"
#> [25] "gaps" "in" "the" "1990"
#> [29] "census" "long" "form" "data"
#> [33] "could" "be" "explained" "by"
#> [37] "reporting" "errors" "in" "education"
#> [41] "as" "de" "we" "use"
#> [45] "the" "subset" "of" "acs"
#> [49] "microdata" "that" "includes" "only"
#> [53] "individuals" "who" "reported" "a"
#> [57] "bachelor’s" "degree" "or" "higher"
#> [61] "and" "are" "under" "age"
#> [65] "76" "the" "resulting" "sample"
#> [69] "size" "is" "ne" "600"
#> [73] "150" "in" "x" "we"
#> [77] "include" "gender" "age" "group"
#> [81] "24" "and" "younger" "25"
#> [85] "39" "40" "54" "and"
#> [89] "55" "and" "older" "and"
#> [93] "an" "indicator" "for" "whether"
#> [97] "the" "individual’s" "race" "is"
#> [101] "black" "or" "something" "else"
#> [105] "in" "the" "nscg" "we"
#> [109] "discarded" "38" "records" "with"
#> [113] "race" "suppressed" "leaving" "a"
#> [117] "sample" "size" "of" "ng"
#> [121] "77" "150" "we" "consider"
#> [125] "two" "sets" "of" "measurement"
#> [129] "error" "model" "specifications" "the"
#> [133] "first" "set" "uses" "specifications"
#> [137] "like" "those" "in" "section"
#> [141] "3" "with" "flat" "prior"
#> [145] "distributions" "for" "all" "parameters"
#> [149] "we" "use" "this" "set"
#> [153] "to" "illustrate" "model" "diagnostics"
#> [157] "and" "sensitivity" "analysis" "absent"
#> [161] "prior" "information" "about" "the"
#> [165] "measurement" "error" "process" "the"
#> [169] "second" "set" "uses" "a"
#> [173] "common" "error" "and" "reporting"
#> [177] "model" "with" "different" "informative"
#> [181] "prior" "distributions" "on" "its"
#> [185] "parameters" "we" "construct" "these"
#> [189] "informative" "prior" "distributions" "based"
#> [193] "on" "the" "analysis" "of"
#> [197] "the" "1993" "linked" "file"
#> [201] "for" "all" "specifications" "considered"
#> [205] "we" "create" "m" "50"
#> [209] "multiple" "imputations" "of" "the"
#> [213] "plausible" "true" "education" "values"
#> [217] "in" "the" "2010" "acs"
#> [221] "which" "we" "then" "analyze"
#> [225] "using" "the" "methods" "of"
#> [229] "rubin" "1987" "for" "all"
#> [233] "specifications" "the" "true" "data"
#> [237] "model" "is" "a" "saturated"
#> [241] "multinomial" "distribution" "for" "the"
#> [245] "five" "values" "of" "y"
#> [249] "for" "each" "combination" "of"
#> [253] "x" "we" "begin" "by"
#> [257] "describing" "how" "we" "estimate"
#> [261] "the" "parameters" "of" "the"
#> [265] "true" "data" "distribution" "accounting"
#> [269] "for" "the" "informative" "sampling"
#> [273] "design" "of" "the" "nscg"
#> [277] "4.1" "accounting" "for" "informative"
#> [281] "sampling" "design" "of" "nscg"
#> [285] "the" "2010" "nscg" "uses"
#> [289] "reported" "education" "in" "the"
#> [293] "2010" "acs" "as" "a"
#> [297] "stratification" "variable" "fesco" "et"
#> [301] "al" "2012" "finamore" "2013"
#> [305] "its" "unweighted" "percentages" "can"
#> [309] "over" "represent" "14"
#>
#>
#> [[15]]
#> [[15]][[1]]
#> [1] "or" "under" "represent" "degree" "types"
#> [6] "in" "the" "population" "this" "is"
#> [11] "most" "obviously" "the" "case" "for"
#> [16] "individuals" "without" "a" "college" "degree"
#> [21] "yi" "5" "we" "need" "to"
#> [26] "account" "for" "this" "informative" "sampling"
#> [31] "when" "estimating" "parameters" "of" "the"
#> [36] "true" "data" "model" "we" "do"
#> [41] "so" "with" "a" "two" "stage"
#> [46] "approach" "first" "we" "use" "survey"
#> [51] "weighted" "inferences" "to" "estimate" "population"
#> [56] "totals" "of" "y" "x" "from"
#> [61] "the" "2010" "nscg" "second" "we"
#> [66] "turn" "these" "estimates" "into" "an"
#> [71] "approximate" "bayesian" "posterior" "distribution" "for"
#> [76] "input" "to" "fitting" "the" "measurement"
#> [81] "error" "models" "used" "to" "impute"
#> [86] "plausible" "values" "of" "yi" "for"
#> [91] "individuals" "in" "the" "acs" "we"
#> [96] "now" "describe" "this" "process" "which"
#> [101] "can" "be" "used" "generally" "when"
#> [106] "dg" "is" "collected" "via" "a"
#> [111] "complex" "survey" "design" "suppose" "for"
#> [116] "the" "moment" "that" "dy" "dz"
#> [121] "this" "is" "not" "the" "case"
#> [126] "when" "de" "is" "the" "acs"
#> [131] "where" "dz" "4" "and" "dg"
#> [136] "is" "the" "nscg" "where" "dy"
#> [141] "5" "however" "we" "start" "here"
#> [146] "to" "fix" "ideas" "for" "all"
#> [151] "possible" "combinations" "x" "let" "θxk"
#> [156] "pr" "y" "k" "x" "x"
#> [161] "and" "let" "θx" "θx1" "θxdy"
#> [166] "we" "seek" "to" "use" "dg"
#> [171] "to" "specify" "f" "θ" "x"
#> [176] "y" "to" "do" "so" "we"
#> [181] "first" "parameterize" "θxk" "txk" "dj"
#> [186] "1" "py" "txj" "where" "txk"
#> [191] "is" "the" "population" "count" "of"
#> [196] "individuals" "with" "xi" "x" "yi"
#> [201] "k" "we" "estimate" "tx" "tx1"
#> [206] "txdy" "and" "the" "associated" "covariance"
#> [211] "matrix" "of" "the" "estimator" "using"
#> [216] "standard" "survey" "weighted" "estimation" "let"
#> [221] "wi" "be" "the" "sample" "weight"
#> [226] "for" "all" "i" "dg" "we"
#> [231] "compute" "the" "estimated" "total" "and"
#> [236] "associated" "variance" "for" "each" "x"
#> [241] "and" "k" "as" "ng" "x"
#> [246] "t̂xk" "wi" "i" "xi" "x"
#> [251] "yi" "k" "6" "i" "1"
#> [256] "ng" "2" "d" "t̂xk" "ng"
#> [261] "t̂xk" "x" "var" "wi" "i"
#> [266] "xi" "x" "yi" "k" "7"
#> [271] "ng" "1" "i" "1" "ng"
#> [276] "15"
#>
#>
#> [[16]]
#> [[16]][[1]]
#> [1] "for" "each" "k" "and" "l"
#> [6] "with" "l" "6" "k" "we"
#> [11] "also" "compute" "the" "estimated" "covariance"
#> [16] "ng" "ng" "x" "t̂xk" "cov"
#> [21] "t̂xk" "t̂xl" "d" "wi" "i"
#> [26] "xi" "x" "yi" "k" "ng"
#> [31] "1" "i" "1" "ng" "t̂xl"
#> [36] "wi" "i" "xi" "x" "yi"
#> [41] "l" "8" "ng" "the" "variance"
#> [46] "and" "covariance" "estimators" "are" "the"
#> [51] "design" "based" "estimators" "for" "probability"
#> [56] "proportional" "to" "size" "sampling" "with"
#> [61] "replacement" "as" "is" "typical" "of"
#> [66] "multi" "stage" "complex" "surveys" "lohr"
#> [71] "2010" "switching" "now" "to" "a"
#> [76] "bayesian" "modeling" "perspective" "we" "assume"
#> [81] "that" "tx" "log" "normal" "µx"
#> [86] "τx" "so" "as" "to" "ensure"
#> [91] "a" "distribution" "with" "positive" "values"
#> [96] "for" "all" "true" "totals" "we"
#> [101] "select" "µx" "τx" "so" "that"
#> [106] "each" "e" "txk" "t̂xk" "and"
#> [111] "var" "tx" "σ̂" "t̂x" "the"
#> [116] "estimated" "covariance" "matrix" "with" "elements"
#> [121] "defined" "by" "7" "and" "8"
#> [126] "these" "are" "derived" "from" "moment"
#> [131] "matching" "tarmast" "2001" "we" "have"
#> [136] "µxj" "log" "t̂xj" "τx" "j"
#> [141] "j" "2" "9" "2" "τx"
#> [146] "j" "j" "log" "1" "σ̂x"
#> [151] "j" "j" "t̂xj" "10" "τx"
#> [156] "j" "i" "log" "1" "σ̂x"
#> [161] "j" "i" "t̂xj" "t̂xi" "11"
#> [166] "where" "the" "notation" "j" "i"
#> [171] "denotes" "an" "element" "in" "row"
#> [176] "j" "and" "column" "i" "of"
#> [181] "the" "matrix" "we" "draw" "tx"
#> [186] "from" "this" "log" "normal" "distribution"
#> [191] "and" "transform" "to" "draws" "θx"
#> [196] "since" "the" "2010" "nscg" "does"
#> [201] "not" "include" "individuals" "who" "claim"
#> [206] "in" "the" "acs" "to" "have"
#> [211] "less" "than" "a" "bachelor’s" "degree"
#> [216] "we" "cannot" "use" "dg" "directly"
#> [221] "to" "estimate" "tx5" "instead" "we"
#> [226] "estimate" "tx" "tx1" "tx2" "tx3"
#> [231] "tx4" "tx5" "using" "the" "acs"
#> [236] "data" "and" "estimate" "tx1" "tx2"
#> [241] "tx3" "tx4" "from" "the" "nscg"
#> [246] "using" "the" "method" "described" "previously"
#> [251] "this" "leads" "to" "an" "estimate"
#> [256] "for" "tx5" "more" "precisely" "let"
#> [261] "the" "acs" "design" "based" "estimator"
#> [266] "for" "tx" "16"
#>
#>
#> [[17]]
#> [[17]][[1]]
#> [1] "table" "2" "summary" "of"
#> [5] "the" "first" "four" "measurement"
#> [9] "error" "model" "specifications" "for"
#> [13] "2010" "nscg" "acs" "analysis"
#> [17] "these" "models" "use" "flat"
#> [21] "prior" "distributions" "on" "all"
#> [25] "parameters" "error" "model" "reporting"
#> [29] "model" "expression" "for" "mit"
#> [33] "β" "p" "r" "zi"
#> [37] "yi" "k" "ei" "1"
#> [41] "p4" "model" "1" "β1"
#> [45] "k" "2" "βk" "i"
#> [49] "yi" "k" "categorical" "pk"
#> [53] "1" "pk" "4" "p4"
#> [57] "m" "model" "2" "β1"
#> [61] "k" "2" "βk" "i"
#> [65] "yi" "k" "xi" "sex"
#> [69] "m" "categorical" "pk" "1"
#> [73] "pk" "4" "p4" "no"
#> [77] "model" "3" "β1" "k"
#> [81] "2" "βk" "i" "yi"
#> [85] "k" "xi" "black" "no"
#> [89] "categorical" "pk" "1" "pk"
#> [93] "4" "p4" "yes" "k"
#> [97] "1" "βk" "i" "yi"
#> [101] "k" "xi" "black" "yes"
#> [105] "p4" "m" "model" "4"
#> [109] "β1" "k" "2" "βk"
#> [113] "i" "yi" "k" "xi"
#> [117] "sex" "m" "categorical" "pm"
#> [121] "k" "1" "pm" "k"
#> [125] "4" "if" "xi" "sex"
#> [129] "m" "p4" "f" "k"
#> [133] "1" "βk" "i" "yi"
#> [137] "k" "xi" "sex" "f"
#> [141] "categorical" "pf" "k" "1"
#> [145] "pf" "k" "4" "if"
#> [149] "xi" "sex" "f" "be"
#> [153] "t̂x" "with" "design" "based"
#> [157] "variance" "estimate" "σ̂" "2"
#> [161] "t̂x" "we" "sample" "a"
#> [165] "value" "tx" "normal" "t̂x"
#> [169] "σ̂" "2" "t̂x" "using"
#> [173] "an" "independent" "sample" "of"
#> [177] "values" "of" "tx1" "tx4"
#> [181] "from" "4j" "1" "txj"
#> [185] "and" "set" "tx" "tx1"
#> [189] "p" "the" "nscg" "we"
#> [193] "compute" "tx5" "tx" "tx5"
#> [197] "we" "repeat" "these" "steps"
#> [201] "10,000" "times" "we" "then"
#> [205] "compute" "the" "mean" "and"
#> [209] "covariance" "matrix" "of" "the"
#> [213] "10,000" "draws" "which" "we"
#> [217] "again" "plug" "into" "9"
#> [221] "11" "the" "resulting" "log"
#> [225] "normal" "distri" "bution" "is"
#> [229] "the" "approximate" "posterior" "distribution"
#> [233] "of" "θx" "we" "include"
#> [237] "an" "example" "of" "this"
#> [241] "entire" "procedure" "in" "the"
#> [245] "supplementary" "material" "4.2" "measurement"
#> [249] "error" "models" "the" "two"
#> [253] "sets" "of" "measurement" "error"
#> [257] "models" "include" "four" "that"
#> [261] "use" "flat" "prior" "distributions"
#> [265] "and" "three" "that" "use"
#> [269] "informative" "prior" "distributions" "based"
#> [273] "on" "the" "1993" "linked"
#> [277] "data" "for" "all" "error"
#> [281] "models" "we" "use" "a"
#> [285] "logistic" "regression" "of" "ei"
#> [289] "on" "various" "main" "effects"
#> [293] "and" "interactions" "of" "yi"
#> [297] "and" "xi" "for" "all"
#> [301] "reporting" "models" "we" "use"
#> [305] "categorical" "distributions" "with" "probabilities"
#> [309] "that" "depend" "on" "yi"
#> [313] "and" "possibly" "xi" "the"
#> [317] "four" "models" "with" "flat"
#> [321] "prior" "distributions" "are" "summarized"
#> [325] "in" "table" "2" "in"
#> [329] "model" "1" "the" "error"
#> [333] "and" "reporting" "models" "depend"
#> [337] "only" "on" "17"
#>
#>
#> [[18]]
#> [[18]][[1]]
#> [1] "table" "3" "summary" "of"
#> [5] "informative" "prior" "specifications" "for"
#> [9] "2010" "nscg" "acs" "analysis"
#> [13] "for" "males" "with" "bachelor’s"
#> [17] "degrees" "error" "rate" "reporting"
#> [21] "probabilities" "pm" "1" "2"
#> [25] "pm" "1" "3" "pm"
#> [29] "1" "4" "model" "4"
#> [33] "beta" "1" "1" "dirichlet"
#> [37] "1" "1" "1" "model"
#> [41] "5" "beta" "76" "14.24"
#> [45] "dirichlet" "3.54" "1.27" "0.19"
#> [49] "model" "6" "beta" "2724.2"
#> [53] "50862" "dirichlet" "2235.3" "799.7"
#> [57] "123.1" "model" "7" "beta"
#> [61] "500" "99500" "dirichlet" "1"
#> [65] "1" "1" "yi" "model"
#> [69] "2" "and" "3" "keep"
#> [73] "the" "reporting" "model" "as"
#> [77] "in" "4" "but" "expand"
#> [81] "the" "error" "model" "in"
#> [85] "model" "2" "the" "probability"
#> [89] "of" "a" "reporting" "error"
#> [93] "can" "vary" "with" "yi"
#> [97] "and" "sex" "xi" "sex"
#> [101] "in" "model" "3" "error"
#> [105] "probabilities" "can" "vary" "with"
#> [109] "yi" "and" "the" "indicator"
#> [113] "for" "black" "race" "xi"
#> [117] "black" "in" "model" "4"
#> [121] "the" "error" "and" "reporting"
#> [125] "models" "both" "depend" "on"
#> [129] "y" "and" "sex" "for"
#> [133] "models" "5" "7" "we"
#> [137] "use" "the" "specification" "in"
#> [141] "model" "4" "and" "incorporate"
#> [145] "prior" "in" "formation" "about"
#> [149] "the" "measurement" "errors" "from"
#> [153] "the" "1993" "linked" "data"
#> [157] "in" "constructing" "the" "priors"
#> [161] "we" "first" "remove" "records"
#> [165] "that" "have" "been" "flagged"
#> [169] "as" "having" "missing" "education"
#> [173] "that" "has" "been" "imputed"
#> [177] "because" "these" "imputations" "might"
#> [181] "not" "closely" "reflect" "the"
#> [185] "actual" "education" "values" "black"
#> [189] "et" "al" "2003" "table"
#> [193] "3" "displays" "the" "prior"
#> [197] "distributions" "for" "males" "with"
#> [201] "bachelor’s" "degrees" "details" "on"
#> [205] "how" "we" "arrive" "at"
#> [209] "these" "and" "other" "groups"
#> [213] "prior" "specifications" "are" "in"
#> [217] "the" "supplementary" "material" "here"
#> [221] "we" "summarize" "briefly" "x"
#> [225] "for" "model" "5" "we"
#> [229] "set" "the" "prior" "distributions"
#> [233] "for" "each" "βk" "so"
#> [237] "that" "the" "error" "rates"
#> [241] "are" "centered" "at" "the"
#> [245] "estimate" "from" "the" "1993"
#> [249] "linked" "data" "we" "also"
#> [253] "require" "the" "central" "95"
#> [257] "probability" "interval" "of" "the"
#> [261] "prior" "distribution" "on" "each"
#> [265] "error" "rate" "to" "be"
#> [269] "close" "to" "005" "20"
#> [273] "allowing" "for" "a" "wide"
#> [277] "but" "not" "unrealistic" "range"
#> [281] "of" "possible" "error" "rates"
#> [285] "for" "the" "reporting" "probabilities"
#> [289] "pm" "k" "z" "and"
#> [293] "pf" "k" "z" "we"
#> [297] "center" "most" "of" "the"
#> [301] "prior" "distributions" "at" "the"
#> [305] "corresponding" "estimates" "from" "the"
#> [309] "1993" "linked" "data" "we"
#> [313] "require" "the" "central" "95"
#> [317] "probability" "interval" "of" "each"
#> [321] "prior" "distribution" "to" "have"
#> [325] "support" "on" "values" "of"
#> [329] "p" "k" "z" "within"
#> [333] "10" "of" "the" "1993"
#> [337] "point" "estimate" "truncating" "at"
#> [341] "zero" "or" "one" "as"
#> [345] "needed" "one" "exception" "is"
#> [349] "18"
#>
#>
#> [[19]]
#> [[19]][[1]]
#> [1] "the" "reporting" "probabilities" "for"
#> [5] "those" "with" "no" "college"
#> [9] "degree" "who" "report" "professional"
#> [13] "degree" "which" "we" "center"
#> [17] "at" "half" "the" "1993"
#> [21] "estimate" "the" "census" "bureau"
#> [25] "has" "improved" "the" "clarity"
#> [29] "of" "the" "definition" "of"
#> [33] "professional" "in" "the" "20"
#> [37] "years" "since" "the" "1990"
#> [41] "long" "form" "as" "discussed"
#> [45] "in" "the" "prior" "specification"
#> [49] "section" "of" "the" "supplementary"
#> [53] "material" "for" "model" "6"
#> [57] "we" "use" "the" "same"
#> [61] "prior" "means" "as" "in"
#> [65] "model" "5" "for" "both"
#> [69] "error" "and" "re" "porting"
#> [73] "models" "however" "we" "substantially"
#> [77] "tighten" "the" "prior" "distributions"
#> [81] "to" "make" "the" "prior"
#> [85] "variance" "accord" "with" "the"
#> [89] "uncertainty" "in" "the" "point"
#> [93] "estimates" "from" "the" "1993"
#> [97] "linked" "data" "we" "do"
#> [101] "so" "by" "using" "prior"
#> [105] "sample" "sizes" "that" "match"
#> [109] "those" "from" "the" "1993"
#> [113] "nscg" "for" "example" "the"
#> [117] "1993" "nscg" "included" "53,586"
#> [121] "males" "with" "bachelor’s" "degrees"
#> [125] "excluding" "those" "records" "who"
#> [129] "had" "their" "census" "education"
#> [133] "imputed" "we" "therefore" "use"
#> [137] "beta" "2724.2" "50862" "as"
#> [141] "the" "prior" "distribution" "for"
#> [145] "the" "error" "rate" "for"
#> [149] "this" "x" "we" "similarly"
#> [153] "increase" "the" "prior" "sample"
#> [157] "sizes" "for" "the" "reporting"
#> [161] "probabilities" "to" "match" "the"
#> [165] "1993" "nscg" "sample" "sizes"
#> [169] "model" "7" "departs" "from"
#> [173] "the" "1993" "linked" "data"
#> [177] "estimates" "and" "encodes" "a"
#> [181] "strong" "prior" "belief" "that"
#> [185] "almost" "no" "one" "misreports"
#> [189] "their" "education" "except" "for"
#> [193] "haphazard" "mistakes" "here" "we"
#> [197] "set" "the" "prior" "mean"
#> [201] "for" "the" "probability" "of"
#> [205] "misreporting" "education" "to" "005"
#> [209] "for" "all" "demographic" "groups"
#> [213] "we" "use" "a" "prior"
#> [217] "sample" "size" "of" "100,000"
#> [221] "making" "the" "prior" "distribution"
#> [225] "concentrate" "strongly" "around" "005"
#> [229] "for" "the" "reporting" "probabilities"
#> [233] "we" "use" "a" "non"
#> [237] "informative" "prior" "distribution" "for"
#> [241] "convenience" "since" "the" "estimates"
#> [245] "of" "the" "reporting" "probabilities"
#> [249] "are" "strongly" "influenced" "by"
#> [253] "the" "concentrated" "prior" "distributions"
#> [257] "on" "the" "error" "rates"
#> [261] "finally" "for" "comparison" "purposes"
#> [265] "we" "also" "fit" "the"
#> [269] "model" "based" "on" "a"
#> [273] "conditional" "independence" "assumption" "cia"
#> [277] "to" "impute" "yi" "for"
#> [281] "individuals" "in" "the" "acs"
#> [285] "under" "the" "cia" "we"
#> [289] "sample" "θ" "and" "then"
#> [293] "impute" "y" "θ" "x"
#> [297] "from" "the" "true" "data"
#> [301] "model" "here" "we" "do"
#> [305] "not" "use" "the" "reported"
#> [309] "value" "of" "zi" "in"
#> [313] "the" "imputations" "19"
#>
#>
#> [[20]]
#> [[20]][[1]]
#> [1] "4.3" "empirical" "results" "we"
#> [5] "first" "examine" "what" "each"
#> [9] "model" "suggests" "about" "the"
#> [13] "extent" "and" "nature" "of"
#> [17] "the" "mea" "surement" "errors"
#> [21] "in" "the" "2010" "acs"
#> [25] "we" "then" "use" "the"
#> [29] "models" "to" "assess" "sensitivity"
#> [33] "of" "results" "about" "the"
#> [37] "substantive" "questions" "related" "to"
#> [41] "number" "of" "degrees" "and"
#> [45] "income" "4.3.1" "distributions" "of"
#> [49] "errors" "in" "reported" "acs"
#> [53] "education" "values" "table" "4"
#> [57] "displays" "the" "multiple" "imputation"
#> [61] "point" "estimates" "and" "95"
#> [65] "confidence" "intervals" "for" "the"
#> [69] "proportions" "of" "errors" "by"
#> [73] "gender" "and" "nscg" "education"
#> [77] "obtained" "from" "the" "m"
#> [81] "50" "draws" "of" "ei"
#> [85] "for" "all" "individuals" "in"
#> [89] "de" "we" "begin" "by"
#> [93] "comparing" "results" "for" "the"
#> [97] "set" "of" "models" "with"
#> [101] "flat" "prior" "distributions" "models"
#> [105] "1" "4" "and" "the"
#> [109] "cia" "model" "then" "move"
#> [113] "to" "the" "set" "of"
#> [117] "models" "with" "informative" "prior"
#> [121] "distributions" "models" "5" "7"
#> [125] "the" "cia" "model" "suggests"
#> [129] "extremely" "large" "error" "percentages"
#> [133] "especially" "for" "the" "highest"
#> [137] "education" "levels" "these" "rates"
#> [141] "seem" "unlikely" "to" "be"
#> [145] "reality" "leading" "us" "to"
#> [149] "reject" "the" "cia" "model"
#> [153] "the" "overall" "error" "rates"
#> [157] "for" "models" "1" "4"
#> [161] "are" "similar" "and" "more"
#> [165] "realistic" "than" "those" "from"
#> [169] "the" "cia" "model" "the"
#> [173] "differences" "in" "error" "estimates"
#> [177] "between" "model" "2" "and"
#> [181] "model" "1" "suggest" "that"
#> [185] "the" "probability" "of" "error"
#> [189] "depends" "on" "sex" "comparing"
#> [193] "results" "for" "model" "3"
#> [197] "and" "model" "1" "however"
#> [201] "we" "see" "little" "evidence"
#> [205] "of" "important" "race" "effects"
#> [209] "on" "the" "propensity" "to"
#> [213] "make" "errors" "model" "4"
#> [217] "generalizes" "model" "2" "by"
#> [221] "allowing" "the" "reporting" "probabilities"
#> [225] "to" "vary" "by" "sex"
#> [229] "if" "these" "probabilities" "were"
#> [233] "similar" "across" "sex" "in"
#> [237] "reality" "we" "would" "expect"
#> [241] "the" "two" "models" "to"
#> [245] "produce" "similar" "results" "however"
#> [249] "the" "estimated" "error" "rates"
#> [253] "are" "fairly" "different" "for"
#> [257] "example" "the" "estimated" "proportion"
#> [261] "of" "errors" "for" "female"
#> [265] "professionals" "from" "model" "4"
#> [269] "is" "about" "double" "that"
#> [273] "from" "model" "2" "to"
#> [277] "determine" "where" "the" "models"
#> [281] "differ" "most" "we" "examine"
#> [285] "the" "estimated" "reporting" "probabilities"
#> [289] "displayed" "in" "table" "5"
#> [293] "model" "4" "estimates" "some"
#> [297] "significant" "differences" "in" "reporting"
#> [301] "probabilities" "by" "gender" "for"
#> [305] "example" "20"
#>
#>
#> [[21]]
#> [[21]][[1]]
#> [1] "males" "with" "bachelor’s" "degrees"
#> [5] "who" "make" "a" "reporting"
#> [9] "error" "are" "estimated" "to"
#> [13] "report" "a" "master’s" "degree"
#> [17] "with" "probability" "96" "whereas"
#> [21] "females" "with" "bachelor’s" "degrees"
#> [25] "who" "make" "a" "reporting"
#> [29] "error" "are" "estimated" "to"
#> [33] "report" "a" "master’s" "degree"
#> [37] "with" "probability" "67" "and"
#> [41] "a" "professional" "degree" "with"
#> [45] "probability" "30" "other" "large"
#> [49] "differences" "exist" "for" "professional"
#> [53] "degree" "holders" "females" "with"
#> [57] "professional" "degrees" "who" "make"
#> [61] "a" "reporting" "error" "are"
#> [65] "most" "likely" "to" "report"
#> [69] "a" "bachelor’s" "degree" "whereas"
#> [73] "men" "with" "professional" "degrees"
#> [77] "who" "make" "a" "reporting"
#> [81] "error" "are" "most" "likely"
#> [85] "to" "report" "a" "master’s"
#> [89] "degree" "or" "ph" "d"
#> [93] "we" "note" "that" "some"
#> [97] "of" "the" "estimates" "for"
#> [101] "model" "4" "are" "based"
#> [105] "on" "small" "sample" "sizes"
#> [109] "which" "explains" "the" "wide"
#> [113] "standard" "errors" "turning" "to"
#> [117] "models" "5" "7" "we"
#> [121] "can" "see" "the" "impact"
#> [125] "of" "the" "informative" "prior"
#> [129] "distributions" "by" "comparing" "results"
#> [133] "in" "table" "4" "under"
#> [137] "these" "models" "to" "those"
#> [141] "for" "model" "4" "moving"
#> [145] "from" "model" "4" "to"
#> [149] "model" "5" "the" "most"
#> [153] "noticeable" "differences" "are" "for"
#> [157] "women" "with" "a" "ph"
#> [161] "d" "and" "men" "with"
#> [165] "a" "master’s" "degree" "for"
#> [169] "whom" "model" "5" "suggests"
#> [173] "lower" "error" "rates" "these"
#> [177] "groups" "have" "smaller" "sample"
#> [181] "sizes" "so" "that" "the"
#> [185] "data" "do" "not" "swamp"
#> [189] "the" "effects" "of" "the"
#> [193] "prior" "distribution" "when" "making"
#> [197] "the" "prior" "sample" "sizes"
#> [201] "very" "large" "as" "in"
#> [205] "models" "6" "and" "7"
#> [209] "the" "information" "in" "the"
#> [213] "prior" "distribution" "tends" "to"
#> [217] "overwhelm" "the" "information" "in"
#> [221] "the" "data" "we" "provide"
#> [225] "more" "thorough" "investigation" "of"
#> [229] "the" "impact" "of" "the"
#> [233] "prior" "specifications" "in" "the"
#> [237] "supplementary" "material" "of" "course"
#> [241] "we" "cannot" "be" "certain"
#> [245] "which" "model" "most" "closely"
#> [249] "reflects" "the" "true" "measure"
#> [253] "ment" "error" "mechanism" "the"
#> [257] "best" "we" "can" "do"
#> [261] "is" "perform" "diagnostic" "tests"
#> [265] "to" "see" "which" "models"
#> [269] "if" "any" "should" "be"
#> [273] "discounted" "as" "not" "adequately"
#> [277] "describing" "the" "observed" "data"
#> [281] "m" "for" "each" "acs"
#> [285] "imputed" "dataset" "de" "under"
#> [289] "each" "model" "we" "compute"
#> [293] "the" "sample" "pro" "m"
#> [297] "portions" "π̂xk" "and" "corresponding"
#> [301] "multiple" "imputation" "95" "confidence"
#> [305] "intervals" "for" "all" "165̇"
#> [309] "unique" "values" "of" "x"
#> [313] "y" "we" "determine" "how"
#> [317] "many" "of" "the" "80"
#> [321] "estimated" "population" "percentages" "of"
#> [325] "y" "x" "computed" "from"
#> [329] "the" "2010" "nscg" "using"
#> [333] "the" "estimated" "t̂x" "from"
#> [337] "the" "acs" "to" "back"
#> [341] "into" "an" "estimate" "of"
#> [345] "t̂x5" "fall" "within" "the"
#> [349] "multiple" "imputation" "95" "21"
#>
#>
#> [[22]]
#> [[22]][[1]]
#> [1] "confidence" "intervals" "models" "that"
#> [5] "yield" "low" "rates" "do"
#> [9] "not" "describe" "the" "data"
#> [13] "accurately" "for" "model" "1"
#> [17] "73" "of" "80" "nscg"
#> [21] "population" "share" "estimates" "are"
#> [25] "contained" "in" "the" "acs"
#> [29] "multiple" "imputation" "intervals" "corresponding"
#> [33] "counts" "are" "75" "for"
#> [37] "model" "2" "71" "for"
#> [41] "model" "3" "and" "76"
#> [45] "for" "model" "4" "these"
#> [49] "results" "suggest" "that" "model"
#> [53] "1" "and" "model" "3"
#> [57] "may" "be" "inferior" "to"
#> [61] "model" "2" "and" "model"
#> [65] "4" "for" "the" "models"
#> [69] "with" "informative" "prior" "distributions"
#> [73] "the" "counts" "are" "74"
#> [77] "for" "model" "5" "67"
#> [81] "for" "model" "6" "and"
#> [85] "54" "for" "model" "7"
#> [89] "although" "the" "prior" "beliefs"
#> [93] "in" "models" "6" "and"
#> [97] "7" "seem" "plausible" "at"
#> [101] "first" "glance" "the" "diagnostic"
#> [105] "suggests" "that" "they" "do"
#> [109] "not" "describe" "the" "2010"
#> [113] "data" "distributions" "as" "well"
#> [117] "as" "models" "4" "and"
#> [121] "5" "considering" "the" "results"
#> [125] "as" "well" "as" "the"
#> [129] "diagnostic" "check" "if" "we"
#> [133] "had" "to" "choose" "one"
#> [137] "model" "we" "would" "select"
#> [141] "model" "5" "it" "seems"
#> [145] "plausible" "that" "the" "probability"
#> [149] "of" "misreporting" "education" "as"
#> [153] "well" "as" "the" "reported"
#> [157] "value" "itself" "when" "errors"
#> [161] "are" "made" "depend" "on"
#> [165] "both" "sex" "and" "true"
#> [169] "education" "level" "additionally" "the"
#> [173] "prior" "distribution" "from" "the"
#> [177] "1993" "linked" "data" "pulls"
#> [181] "estimates" "in" "groups" "with"
#> [185] "little" "sample" "size" "to"
#> [189] "measurement" "error" "distributions" "that"
#> [193] "seem" "more" "plausible" "on"
#> [197] "face" "value" "however" "one"
#> [201] "need" "not" "use" "the"
#> [205] "data" "fusion" "framework" "for"
#> [209] "measurement" "error" "to" "select"
#> [213] "a" "single" "model" "rather"
#> [217] "one" "can" "use" "the"
#> [221] "framework" "to" "examine" "sensitivity"
#> [225] "of" "analyses" "to" "the"
#> [229] "different" "specifications" "4.3.2" "sensitivity"
#> [233] "analyses" "figure" "2" "displays"
#> [237] "the" "multiply" "imputed" "survey"
#> [241] "weighted" "inferences" "for" "the"
#> [245] "total" "number" "of" "women"
#> [249] "with" "science" "and" "engineering"
#> [253] "degrees" "computing" "using" "the"
#> [257] "acs" "specific" "indicator" "variable"
#> [261] "we" "show" "results" "for"
#> [265] "models" "4" "7" "the"
#> [269] "cia" "model" "and" "based"
#> [273] "on" "the" "acs" "data"
#> [277] "without" "any" "adjustment" "for"
#> [281] "misreporting" "education" "the" "confidence"
#> [285] "intervals" "for" "model" "4"
#> [289] "and" "model" "5" "overlap"
#> [293] "substantially" "suggesting" "not" "much"
#> [297] "practical" "difference" "in" "choosing"
#> [301] "among" "these" "models" "however"
#> [305] "both" "are" "noticeably" "different"
#> [309] "from" "the" "other" "models"
#> [313] "especially" "for" "the" "ph"
#> [317] "d" "and" "professional" "degrees"
#> [321] "as" "the" "prior" "distributions"
#> [325] "on" "the" "error" "rates"
#> [329] "get" "stronger" "the" "estimated"
#> [333] "counts" "increase" "towards" "22"
#>
#>
#> [[23]]
#> [[23]][[1]]
#> [1] "6" "x" "10" "bachelors"
#> [5] "degree" "6" "x" "10"
#> [9] "masters" "degree" "5.2" "2.6"
#> [13] "acs" "cia" "model" "model"
#> [17] "4" "5" "model" "5"
#> [21] "2.5" "estimated" "total" "no"
#> [25] "of" "sci" "and" "eng"
#> [29] "degrees" "estimated" "total" "no"
#> [33] "of" "sci" "and" "eng"
#> [37] "degrees" "model" "6" "model"
#> [41] "7" "4.8" "2.4" "awarded"
#> [45] "to" "women" "awarded" "to"
#> [49] "women" "4.6" "2.3" "4.4"
#> [53] "2.2" "4.2" "2.1" "4"
#> [57] "2" "acs" "cia" "m4"
#> [61] "m5" "m6" "m7" "acs"
#> [65] "cia" "m4" "m5" "m6"
#> [69] "m7" "model" "model" "5"
#> [73] "x" "10" "professional" "degree"
#> [77] "x" "10" "5" "phd"
#> [81] "degree" "7.5" "5" "7"
#> [85] "4.5" "estimated" "total" "no"
#> [89] "of" "sci" "and" "eng"
#> [93] "degrees" "6.5" "estimated" "total"
#> [97] "no" "of" "sci" "and"
#> [101] "eng" "degrees" "4" "6"
#> [105] "awarded" "to" "women" "awarded"
#> [109] "to" "women" "5.5" "3.5"
#> [113] "5" "3" "4.5" "2.5"
#> [117] "4" "2" "3.5" "3"
#> [121] "1.5" "acs" "cia" "m4"
#> [125] "m5" "m6" "m7" "acs"
#> [129] "cia" "m4" "m5" "m6"
#> [133] "m7" "model" "model" "figure"
#> [137] "2" "the" "estimated" "total"
#> [141] "number" "of" "science" "and"
#> [145] "engineering" "degrees" "awarded" "to"
#> [149] "women" "under" "each" "model"
#> [153] "we" "plot" "the" "mean"
#> [157] "and" "95" "confidence" "intervals"
#> [161] "note" "the" "difference" "in"
#> [165] "scale" "for" "each" "degree"
#> [169] "category" "the" "estimate" "using"
#> [173] "the" "acs" "reported" "education"
#> [177] "we" "note" "that" "using"
#> [181] "the" "acs" "reported" "education"
#> [185] "without" "adjustments" "results" "in"
#> [189] "substantially" "higher" "estimated" "totals"
#> [193] "at" "the" "professional" "and"
#> [197] "ph" "d" "levels" "than"
#> [201] "any" "of" "the" "models"
#> [205] "that" "account" "for" "measurement"
#> [209] "error" "we" "also" "note"
#> [213] "that" "the" "cia" "model"
#> [217] "yields" "considerably" "lower" "counts"
#> [221] "for" "all" "but" "bachelor’s"
#> [225] "degrees" "figure" "3" "displays"
#> [229] "inferences" "for" "the" "average"
#> [233] "income" "for" "different" "degrees"
#> [237] "for" "most" "degrees" "the"
#> [241] "point" "estimates" "for" "models"
#> [245] "4" "7" "are" "reasonably"
#> [249] "close" "with" "models" "4"
#> [253] "23"
#>
#>
#> [[24]]
#> [[24]][[1]]
#> [1] "and" "5" "again" "giving"
#> [5] "similar" "results" "the" "estimated"
#> [9] "average" "income" "for" "professionals"
#> [13] "differs" "noticeably" "across" "models"
#> [17] "with" "model" "4" "and"
#> [21] "model" "5" "suggesting" "lower"
#> [25] "averages" "than" "the" "unadjusted"
#> [29] "acs" "estimates" "or" "than"
#> [33] "models" "6" "and" "7"
#> [37] "we" "note" "that" "the"
#> [41] "cia" "model" "estimates" "are"
#> [45] "clearly" "implausible" "as" "an"
#> [49] "independent" "check" "on" "these"
#> [53] "estimates" "we" "considered" "the"
#> [57] "estimated" "average" "earnings" "in"
#> [61] "the" "2010" "current" "population"
#> [65] "survey" "they" "are" "83,720"
#> [69] "for" "professional" "80,600" "for"
#> [73] "ph" "d" "degree" "66,144"
#> [77] "for" "master’s" "degree" "and"
#> [81] "53,976" "for" "bachelor’s" "degree"
#> [85] "http" "www" "collegequest" "com"
#> [89] "bls" "research" "education" "pays"
#> [93] "2010" "aspx" "these" "line"
#> [97] "up" "more" "closely" "with"
#> [101] "the" "estimates" "from" "model"
#> [105] "5" "than" "any" "other"
#> [109] "model" "especially" "for" "the"
#> [113] "professional" "degree" "category" "where"
#> [117] "the" "estimates" "most" "differ"
#> [121] "figure" "4" "displays" "inferences"
#> [125] "for" "the" "average" "income"
#> [129] "for" "men" "and" "women"
#> [133] "all" "models" "support" "the"
#> [137] "conclusion" "that" "men" "make"
#> [141] "more" "than" "women" "apparently"
#> [145] "misreporting" "in" "education" "does"
#> [149] "not" "account" "for" "that"
#> [153] "gap" "at" "least" "for"
#> [157] "the" "models" "considered" "here"
#> [161] "we" "note" "that" "model"
#> [165] "4" "suggests" "potentially" "larger"
#> [169] "income" "gaps" "between" "male"
#> [173] "and" "female" "ph" "d"
#> [177] "recipients" "than" "the" "other"
#> [181] "models" "5" "concluding" "remarks"
#> [185] "the" "framework" "presented" "in"
#> [189] "this" "article" "offers" "analysts"
#> [193] "tools" "for" "using" "the"
#> [197] "information" "in" "a" "high"
#> [201] "quality" "separate" "data" "source"
#> [205] "to" "adjust" "for" "measurement"
#> [209] "errors" "in" "the" "database"
#> [213] "of" "interest" "key" "to"
#> [217] "the" "framework" "is" "to"
#> [221] "replace" "conditional" "independence" "assumptions"
#> [225] "typically" "used" "in" "data"
#> [229] "fusion" "with" "carefully" "considered"
#> [233] "measurement" "error" "models" "this"
#> [237] "avoids" "sacrificing" "information" "and"
#> [241] "facilitates" "analysis" "of" "the"
#> [245] "sensitivity" "of" "conclusions" "to"
#> [249] "alternative" "measurement" "error" "specifications"
#> [253] "analysts" "can" "use" "diagnostic"
#> [257] "tests" "to" "rule" "out"
#> [261] "some" "measurement" "error" "models"
#> [265] "and" "perform" "sensibility" "tests"
#> [269] "on" "others" "to" "identify"
#> [273] "reasonable" "candidates" "24"
#>
#>
#> [[25]]
#> [[25]][[1]]
#> [1] "4" "x" "10" "10"
#> [5] "acs" "cia" "model" "model"
#> [9] "4" "9" "model" "5"
#> [13] "model" "6" "model" "7"
#> [17] "8" "estimated" "average" "income"
#> [21] "7" "6" "5" "4"
#> [25] "3" "ba" "ma" "prof"
#> [29] "phd" "none" "education" "level"
#> [33] "figure" "3" "multiple" "imputation"
#> [37] "point" "and" "95" "confidence"
#> [41] "interval" "estimates" "for" "the"
#> [45] "average" "income" "within" "each"
#> [49] "education" "level" "the" "acs"
#> [53] "estimate" "is" "the" "survey"
#> [57] "weighted" "estimate" "based" "on"
#> [61] "the" "reported" "education" "level"
#> [65] "in" "the" "2010" "acs"
#> [69] "besides" "survey" "sampling" "contexts"
#> [73] "like" "the" "one" "considered"
#> [77] "here" "involving" "the" "acs"
#> [81] "and" "nscg" "the" "framework"
#> [85] "offers" "potential" "approaches" "for"
#> [89] "dealing" "with" "possible" "mea"
#> [93] "surement" "errors" "in" "organic"
#> [97] "big" "data" "this" "is"
#> [101] "increasingly" "important" "as" "data"
#> [105] "stewards" "and" "analysts" "consider"
#> [109] "replacing" "or" "supplementing" "high"
#> [113] "quality" "but" "expensive" "surveys"
#> [117] "with" "inexpensive" "and" "large"
#> [121] "sample" "organic" "data" "often"
#> [125] "scant" "attention" "is" "paid"
#> [129] "to" "the" "potential" "impact"
#> [133] "of" "measurement" "errors" "on"
#> [137] "inferences" "from" "those" "data"
#> [141] "the" "framework" "could" "be"
#> [145] "used" "with" "high" "quality"
#> [149] "validated" "surveys" "as" "the"
#> [153] "gold" "standard" "data" "allowing"
#> [157] "for" "adjustments" "to" "the"
#> [161] "error" "prone" "organic" "data"
#> [165] "25"
#>
#>
#> [[26]]
#> [[26]][[1]]
#> [1] "4" "x" "10"
#> [4] "bachelors" "degree" "4"
#> [7] "x" "10" "masters"
#> [10] "degree" "7" "8"
#> [13] "male" "small" "marker"
#> [16] "female" "large" "marker"
#> [19] "6.5" "7.5" "7"
#> [22] "estimated" "average" "income"
#> [25] "by" "gender" "estimated"
#> [28] "average" "income" "by"
#> [31] "gender" "6" "6.5"
#> [34] "5.5" "6" "5"
#> [37] "5.5" "4.5" "5"
#> [40] "4" "4.5" "3.5"
#> [43] "4" "3" "3.5"
#> [46] "acs" "cia" "m4"
#> [49] "m5" "m6" "m7"
#> [52] "acs" "cia" "m4"
#> [55] "m5" "m6" "m7"
#> [58] "model" "specification" "model"
#> [61] "specification" "4" "x"
#> [64] "10" "professional" "degree"
#> [67] "x" "10" "4"
#> [70] "phd" "degree" "12"
#> [73] "10" "11" "9"
#> [76] "10" "estimated" "average"
#> [79] "income" "by" "gender"
#> [82] "estimated" "average" "income"
#> [85] "by" "gender" "8"
#> [88] "9" "8" "7"
#> [91] "7" "6" "6"
#> [94] "5" "5" "4"
#> [97] "4" "3" "3"
#> [100] "acs" "cia" "m4"
#> [103] "m5" "m6" "m7"
#> [106] "acs" "cia" "m4"
#> [109] "m5" "m6" "m7"
#> [112] "model" "specification" "model"
#> [115] "specification" "figure" "4"
#> [118] "multiple" "imputation" "point"
#> [121] "and" "95" "confidence"
#> [124] "interval" "estimates" "for"
#> [127] "the" "average" "income"
#> [130] "for" "men" "and"
#> [133] "women" "within" "each"
#> [136] "education" "level" "the"
#> [139] "acs" "estimate" "is"
#> [142] "the" "survey" "weighted"
#> [145] "estimate" "based" "on"
#> [148] "the" "reported" "education"
#> [151] "level" "in" "the"
#> [154] "2010" "acs" "supplementary"
#> [157] "materials" "all" "supplemental"
#> [160] "files" "listed" "below"
#> [163] "are" "contained" "in"
#> [166] "a" "single" "zip"
#> [169] "file" "supplementary" "zip"
#> [172] "and" "can" "be"
#> [175] "obtained" "via" "a"
#> [178] "single" "download" "supplementary"
#> [181] "results" "supplementary" "details"
#> [184] "and" "additional" "results"
#> [187] "for" "paper" "supp"
#> [190] "material" "final" "pdf"
#> [193] "acs" "data" "2010"
#> [196] "acs" "data" "used"
#> [199] "in" "the" "paper"
#> [202] "acsdata" "2010standardized" "csv"
#> [205] "zip" "26"
#>
#>
#> [[27]]
#> [[27]][[1]]
#> [1] "matlab" "code" "matlab"
#> [4] "files" "containing" "main"
#> [7] "code" "maincode" "edu"
#> [10] "2010app" "report1993" "m"
#> [13] "and" "helper" "functions"
#> [16] "design" "m" "and"
#> [19] "dirsamp" "m" "as"
#> [22] "well" "as" "parameter"
#> [25] "files" "mu" "mat"
#> [28] "and" "tauspd" "mat"
#> [31] "code" "zip" "prior"
#> [34] "distributions" "csv" "files"
#> [37] "are" "provided" "for"
#> [40] "priors" "used" "in"
#> [43] "model" "5" "and"
#> [46] "read" "in" "by"
#> [49] "main" "matlab" "code"
#> [52] "referred" "to" "as"
#> [55] "femalereportprior1993" "csv" "malereport"
#> [58] "prior1993" "csv" "betareportprior"
#> [61] "csv" "priors" "zip"
#> [64] "references" "abayomi" "k"
#> [67] "gelman" "a" "and"
#> [70] "levy" "m" "2008"
#> [73] "diagnostics" "for" "multivariate"
#> [76] "impu" "tations" "journal"
#> [79] "of" "the" "royal"
#> [82] "statistical" "society" "series"
#> [85] "c" "applied" "statistics"
#> [88] "57" "273" "291"
#> [91] "black" "d" "haviland"
#> [94] "a" "sanders" "s"
#> [97] "and" "taylor" "l"
#> [100] "2006" "why" "do"
#> [103] "minority" "men" "earn"
#> [106] "less" "a" "study"
#> [109] "of" "wage" "differentials"
#> [112] "among" "the" "highly"
#> [115] "educated" "the" "review"
#> [118] "of" "economics" "and"
#> [121] "statistics" "88" "300"
#> [124] "313" "black" "d"
#> [127] "sanders" "s" "and"
#> [130] "taylor" "l" "2003"
#> [133] "measurement" "of" "higher"
#> [136] "education" "in" "the"
#> [139] "census" "and" "current"
#> [142] "population" "survey" "journal"
#> [145] "of" "the" "american"
#> [148] "statistical" "association" "98"
#> [151] "545" "554" "black"
#> [154] "d" "a" "haviland"
#> [157] "a" "m" "sanders"
#> [160] "s" "g" "and"
#> [163] "taylor" "l" "j"
#> [166] "2008" "gender" "wage"
#> [169] "disparities" "among" "the"
#> [172] "highly" "educated" "journal"
#> [175] "of" "human" "resources"
#> [178] "43" "630" "659"
#> [181] "carrig" "m" "manrique"
#> [184] "vallier" "d" "ranby"
#> [187] "k" "reiter" "j"
#> [190] "p" "and" "hoyle"
#> [193] "r" "2015" "a"
#> [196] "multiple" "imputation" "based"
#> [199] "method" "for" "the"
#> [202] "retrospective" "harmonization" "of"
#> [205] "data" "sets" "multivariate"
#> [208] "behavioral" "research" "50"
#> [211] "383" "397" "curran"
#> [214] "p" "j" "and"
#> [217] "hussong" "a" "m"
#> [220] "2009" "integrative" "data"
#> [223] "analysis" "the" "simultaneous"
#> [226] "analysis" "of" "multiple"
#> [229] "data" "sets" "psychological"
#> [232] "methods" "14" "81"
#> [235] "100" "d’orazio" "m"
#> [238] "di" "zio" "m"
#> [241] "and" "scanu" "m"
#> [244] "2006" "statistical" "matching"
#> [247] "theory" "and" "practice"
#> [250] "hoboken" "nj" "wiley"
#> [253] "dunson" "d" "b"
#> [256] "and" "xing" "c"
#> [259] "2009" "nonparametric" "bayes"
#> [262] "modeling" "of" "multivariate"
#> [265] "categorical" "data" "journal"
#> [268] "of" "the" "american"
#> [271] "statistical" "association" "104"
#> [274] "1042" "1051" "fesco"
#> [277] "r" "s" "frase"
#> [280] "m" "j" "and"
#> [283] "kannankutty" "n" "2012"
#> [286] "using" "the" "american"
#> [289] "commu" "nity" "survey"
#> [292] "as" "the" "sampling"
#> [295] "frame" "for" "the"
#> [298] "national" "survey" "of"
#> [301] "college" "graduates" "working"
#> [304] "paper" "ncses" "12"
#> [307] "201" "national" "science"
#> [310] "foundation" "national" "center"
#> [313] "for" "science" "and"
#> [316] "engineering" "statistics" "arlington"
#> [319] "va" "27"
#>
#>
#> [[28]]
#> [[28]][[1]]
#> [1] "finamore" "j" "2013" "national"
#> [5] "survey" "of" "college" "graduates"
#> [9] "about" "the" "survey" "na"
#> [13] "tional" "center" "for" "science"
#> [17] "and" "engineering" "statistics" "fosdick"
#> [21] "b" "k" "deyoreo" "m"
#> [25] "and" "reiter" "j" "p"
#> [29] "2016" "categorical" "data" "fusion"
#> [33] "using" "auxiliary" "information" "annals"
#> [37] "of" "applied" "statistics" "to"
#> [41] "appear" "he" "y" "landrum"
#> [45] "m" "b" "and" "zaslavksy"
#> [49] "a" "m" "2014" "combining"
#> [53] "information" "from" "two" "data"
#> [57] "sources" "with" "misreporting" "and"
#> [61] "incompleteness" "to" "assess" "hospice"
#> [65] "use" "among" "cancer" "patients"
#> [69] "a" "multiple" "imputation" "appraoch"
#> [73] "statistics" "in" "medicine" "33"
#> [77] "3710" "3724" "hirano" "k"
#> [81] "imbens" "g" "ridder" "g"
#> [85] "and" "rubin" "d" "2001"
#> [89] "combining" "panel" "data" "sets"
#> [93] "with" "attrition" "and" "refreshment"
#> [97] "samples" "econometrica" "69" "1645"
#> [101] "1659" "kim" "h" "j"
#> [105] "cox" "l" "h" "karr"
#> [109] "a" "f" "reiter" "j"
#> [113] "p" "and" "wang" "q"
#> [117] "2015" "simultane" "ous" "edit"
#> [121] "imputation" "for" "continuous" "microdata"
#> [125] "journal" "of" "the" "american"
#> [129] "statistical" "association" "110" "987"
#> [133] "999" "lohr" "s" "l"
#> [137] "2010" "sampling" "design" "and"
#> [141] "analysis" "boston" "ma" "brooks"
#> [145] "cole" "2nd" "ed" "manrique"
#> [149] "vallier" "d" "and" "reiter"
#> [153] "j" "p" "2016" "bayesian"
#> [157] "simultaneous" "edit" "and" "impu"
#> [161] "tation" "for" "multivariate" "categorical"
#> [165] "data" "journal" "of" "the"
#> [169] "american" "statistical" "asso" "ciation"
#> [173] "to" "appear" "moriarity" "c"
#> [177] "and" "scheuren" "f" "2001"
#> [181] "statistical" "matching" "a" "paradigm"
#> [185] "for" "assessing" "the" "uncertainty"
#> [189] "in" "the" "procedure" "journal"
#> [193] "of" "official" "statistics" "17"
#> [197] "407" "422" "national" "science"
#> [201] "foundation" "1993" "national" "survey"
#> [205] "of" "college" "graduates" "1993"
#> [209] "http" "doi" "org" "10.3886"
#> [213] "icpsr06880" "v1" "icpsr06880" "v1"
#> [217] "ann" "arbor" "mi" "inter"
#> [221] "university" "consortium" "for" "political"
#> [225] "and" "social" "research" "distributor"
#> [229] "2014" "10" "02" "pepe"
#> [233] "m" "s" "1992" "inference"
#> [237] "using" "surrogate" "outcome" "data"
#> [241] "and" "a" "validation" "sample"
#> [245] "biometrika" "79" "355" "365"
#> [249] "raghunathan" "t" "e" "2006"
#> [253] "combining" "information" "from" "multiple"
#> [257] "surveys" "for" "assess" "ing"
#> [261] "health" "disparities" "allgemeines" "statistisches"
#> [265] "archiv" "90" "515" "526"
#> [269] "rassler" "s" "2002" "statistical"
#> [273] "matching" "new" "york" "springer"
#> [277] "reiter" "j" "2008" "multiple"
#> [281] "imputation" "when" "records" "used"
#> [285] "for" "imputation" "are" "not"
#> [289] "used" "or" "disseminated" "for"
#> [293] "analysis" "biometrika" "95" "933"
#> [297] "946" "reiter" "j" "p"
#> [301] "2012" "bayesian" "finite" "population"
#> [305] "imputation" "for" "data" "fusion"
#> [309] "statistica" "sinica" "22" "795"
#> [313] "811" "rubin" "d" "b"
#> [317] "1986" "statistical" "matching" "using"
#> [321] "file" "concatenation" "with" "adjusted"
#> [325] "weights" "and" "multiple" "imputations"
#> [329] "journal" "of" "business" "economic"
#> [333] "statistics" "4" "87" "94"
#> [337] "28"
#>
#>
#> [[29]]
#> [[29]][[1]]
#> [1] "1987" "multiple" "imputation" "for"
#> [5] "nonresponse" "in" "surveys" "new"
#> [9] "york" "john" "wiley" "sons"
#> [13] "schenker" "n" "and" "raghunathan"
#> [17] "t" "e" "2007" "combining"
#> [21] "information" "from" "multiple" "surveys"
#> [25] "to" "enhance" "estimation" "of"
#> [29] "measures" "of" "health" "statistics"
#> [33] "in" "medicine" "26" "1802"
#> [37] "1811" "schenker" "n" "raghunathan"
#> [41] "t" "e" "and" "bondarenko"
#> [45] "i" "2010" "improving" "on"
#> [49] "analyses" "of" "self" "reported"
#> [53] "data" "in" "a" "large"
#> [57] "scale" "health" "survey" "by"
#> [61] "using" "information" "from" "an"
#> [65] "examination" "based" "survey" "statistics"
#> [69] "in" "medicine" "29" "533"
#> [73] "545" "schifeling" "t" "a"
#> [77] "cheng" "c" "reiter" "j"
#> [81] "p" "and" "hillygus" "d"
#> [85] "s" "2015" "accounting" "for"
#> [89] "nonignorable" "unit" "nonresponse" "and"
#> [93] "attrition" "in" "panel" "studies"
#> [97] "with" "refreshment" "samples" "journal"
#> [101] "of" "survey" "statistics" "and"
#> [105] "methodology" "3" "265" "295"
#> [109] "si" "y" "and" "reiter"
#> [113] "j" "2013" "nonparametric" "bayesian"
#> [117] "multiple" "imputation" "for" "incom"
#> [121] "plete" "categorical" "variables" "in"
#> [125] "large" "scale" "assessment" "surveys"
#> [129] "journal" "of" "educational" "and"
#> [133] "behavioral" "statistics" "38" "499"
#> [137] "521" "si" "y" "reiter"
#> [141] "j" "p" "and" "hillygus"
#> [145] "d" "s" "2015" "semi"
#> [149] "parametric" "selection" "models" "for"
#> [153] "potentially" "non" "ignorable" "attrition"
#> [157] "in" "panel" "studies" "with"
#> [161] "refreshment" "samples" "political" "analysis"
#> [165] "23" "92" "112" "siddique"
#> [169] "j" "reiter" "j" "p"
#> [173] "brincks" "a" "gibbons" "r"
#> [177] "d" "crespi" "c" "m"
#> [181] "and" "brown" "c" "h"
#> [185] "2015" "multiple" "imputation" "for"
#> [189] "harmonizing" "longitudinal" "non" "commensurate"
#> [193] "measures" "in" "individual" "participant"
#> [197] "data" "meta" "analysis" "statistics"
#> [201] "in" "medicine" "34" "3399"
#> [205] "3414" "tarmast" "g" "2001"
#> [209] "multivariate" "log" "normal" "distribution"
#> [213] "in" "international" "statistical" "institute"
#> [217] "seoul" "53rd" "session" "yucel"
#> [221] "r" "m" "and" "zaslavsky"
#> [225] "a" "m" "2005" "imputation"
#> [229] "of" "binary" "treatment" "variables"
#> [233] "with" "measurement" "error" "in"
#> [237] "administrative" "data" "journal" "of"
#> [241] "the" "american" "statistical" "association"
#> [245] "100" "1123" "1132" "29"
#>
#>
#> [[30]]
#> [[30]][[1]]
#> [1] "table" "4" "error" "rate"
#> [5] "estimates" "from" "different" "model"
#> [9] "specifications" "models" "1" "7"
#> [13] "are" "run" "for" "100,000"
#> [17] "mcmc" "iterations" "we" "save"
#> [21] "m" "50" "completed" "datasets"
#> [25] "under" "each" "model" "for"
#> [29] "each" "dataset" "we" "compute"
#> [33] "the" "estimated" "overall" "error"
#> [37] "rate" "estimated" "error" "rate"
#> [41] "by" "gender" "and" "imputed"
#> [45] "y" "and" "associated" "variances"
#> [49] "using" "ratio" "estimators" "that"
#> [53] "incorporate" "the" "acs" "final"
#> [57] "survey" "weights" "estimate" "estimate"
#> [61] "by" "group" "overall" "y"
#> [65] "ba" "y" "ma" "y"
#> [69] "prof" "y" "phd" "cia"
#> [73] "model" "male" "37" "36"
#> [77] "37" "76" "75" "76"
#> [81] "91" "91" "92" "94"
#> [85] "93" "95" "57" "55"
#> [89] "58" "female" "35" "35"
#> [93] "36" "72" "71" "72"
#> [97] "95" "94" "95" "97"
#> [101] "96" "97" "model" "1"
#> [105] "male" "05" "04" "06"
#> [109] "10" "08" "11" "18"
#> [113] "15" "21" "27" "23"
#> [117] "31" "17" "16" "19"
#> [121] "female" "05" "05" "06"
#> [125] "09" "08" "10" "18"
#> [129] "15" "21" "28" "24"
#> [133] "32" "model" "2" "male"
#> [137] "05" "04" "06" "18"
#> [141] "16" "21" "27" "18"
#> [145] "37" "36" "30" "42"
#> [149] "20" "18" "21" "female"
#> [153] "05" "05" "06" "12"
#> [157] "10" "14" "26" "20"
#> [161] "33" "41" "29" "53"
#> [165] "model" "3" "male" "05"
#> [169] "04" "06" "09" "08"
#> [173] "11" "17" "14" "20"
#> [177] "25" "21" "30" "17"
#> [181] "16" "19" "female" "05"
#> [185] "05" "06" "09" "08"
#> [189] "10" "17" "14" "20"
#> [193] "26" "21" "31" "model"
#> [197] "4" "male" "05" "04"
#> [201] "06" "19" "16" "23"
#> [205] "36" "26" "46" "36"
#> [209] "27" "45" "22" "20"
#> [213] "24" "female" "09" "08"
#> [217] "10" "14" "11" "17"
#> [221] "52" "44" "59" "55"
#> [225] "40" "70" "model" "5"
#> [229] "male" "07" "06" "08"
#> [233] "19" "16" "22" "23"
#> [237] "14" "32" "34" "27"
#> [241] "41" "22" "20" "24"
#> [245] "female" "09" "08" "10"
#> [249] "12" "09" "15" "50"
#> [253] "43" "57" "31" "17"
#> [257] "46" "model" "6" "male"
#> [261] "05" "05" "05" "09"
#> [265] "08" "10" "10" "09"
#> [269] "11" "10" "09" "11"
#> [273] "16" "14" "17" "female"
#> [277] "05" "04" "05" "06"
#> [281] "05" "07" "16" "14"
#> [285] "18" "07" "06" "09"
#> [289] "model" "7" "male" "01"
#> [293] "01" "01" "01" "00"
#> [297] "01" "00" "00" "01"
#> [301] "01" "00" "01" "11"
#> [305] "09" "13" "female" "01"
#> [309] "01" "01" "01" "01"
#> [313] "01" "01" "00" "01"
#> [317] "01" "00" "01" "30"
#>
#>
#> [[31]]
#> [[31]][[1]]
#> [1] "table" "5" "estimated" "mean"
#> [5] "and" "95" "confidence" "interval"
#> [9] "of" "reporting" "probabilities" "under"
#> [13] "model" "2" "and" "reporting"
#> [17] "probabilities" "by" "gender" "under"
#> [21] "model" "4" "z" "ba"
#> [25] "z" "ma" "z" "prof"
#> [29] "z" "phd" "y" "ba"
#> [33] "model" "2" "95" "87"
#> [37] "1.00" "04" "00" "11"
#> [41] "01" "00" "03" "model"
#> [45] "4" "male" "96" "90"
#> [49] "1.00" "02" "00" "07"
#> [53] "02" "00" "05" "model"
#> [57] "4" "female" "67" "58"
#> [61] "76" "30" "22" "38"
#> [65] "03" "00" "07" "y"
#> [69] "ma" "model" "2" "02"
#> [73] "00" "06" "51" "43"
#> [77] "59" "47" "39" "55"
#> [81] "model" "4" "male" "04"
#> [85] "00" "11" "57" "48"
#> [89] "66" "39" "31" "47"
#> [93] "model" "4" "female" "11"
#> [97] "00" "25" "39" "26"
#> [101] "52" "50" "40" "61"
#> [105] "y" "prof" "model" "2"
#> [109] "05" "00" "16" "69"
#> [113] "54" "83" "26" "14"
#> [117] "38" "model" "4" "male"
#> [121] "02" "00" "06" "69"
#> [125] "44" "94" "29" "04"
#> [129] "54" "model" "4" "female"
#> [133] "91" "79" "1.00" "06"
#> [137] "00" "16" "04" "00"
#> [141] "10" "y" "phd" "model"
#> [145] "2" "01" "00" "04"
#> [149] "39" "15" "63" "60"
#> [153] "36" "83" "model" "4"
#> [157] "male" "01" "00" "05"
#> [161] "21" "02" "39" "78"
#> [165] "60" "96" "model" "4"
#> [169] "female" "10" "00" "30"
#> [173] "77" "50" "1.00" "13"
#> [177] "00" "34" "y" "none"
#> [181] "model" "2" "95" "95"
#> [185] "96" "03" "03" "04"
#> [189] "01" "01" "01" "00"
#> [193] "00" "00" "model" "4"
#> [197] "male" "97" "96" "97"
#> [201] "03" "02" "03" "01"
#> [205] "00" "01" "00" "00"
#> [209] "00" "model" "4" "female"
#> [213] "96" "95" "97" "04"
#> [217] "03" "05" "00" "00"
#> [221] "00" "00" "00" "00"
#> [225] "31"
#>
#>