Ability to tokenize words.

convert_tokens(
  x,
  path = FALSE,
  split_pdf = FALSE,
  remove_hyphen = TRUE,
  token_function = NULL
)

Arguments

x: The text of the pdf file. This can be specified directly or the pdftools package is used to read the pdf file from a file path. To use the pdftools, the path argument must be set to TRUE.
path: An optional path designation for the location of the pdf to be converted to text. The pdftools package is used for this conversion.
split_pdf: TRUE/FALSE indicating whether to split the pdf using white space. This would be most useful with multicolumn pdf files. The split_pdf function attempts to recreate the column layout of the text into a single column starting with the left column and proceeding to the right.
remove_hyphen: TRUE/FALSE indicating whether hyphenated words should be adjusted to combine onto a single line. Default is TRUE.
token_function: This is a function from the tokenizers package. Default is the tokenize_words function.

Value

A list of character vectors containing the tokens. More detail can be found looking at the documentation of the tokenizers package.

Examples

 file <- system.file('pdf', '1610.00147.pdf', package = 'pdfsearch')
 convert_tokens(file, path = TRUE) 
#> [[1]]
#> [[1]][[1]]
#>   [1] "data"           "fusion"         "for"            "correcting"    
#>   [5] "measurement"    "errors"         "tracy"          "schifeling"    
#>   [9] "jerome"         "p"              "reiter"         "maria"         
#>  [13] "deyoreo"        "arxiv"          "1610.00147v1"   "stat"          
#>  [17] "me"             "1"              "oct"            "2016"          
#>  [21] "abstract"       "often"          "in"             "surveys"       
#>  [25] "key"            "items"          "are"            "subject"       
#>  [29] "to"             "measurement"    "errors"         "given"         
#>  [33] "just"           "the"            "data"           "it"            
#>  [37] "can"            "be"             "difficult"      "to"            
#>  [41] "determine"      "the"            "distribution"   "of"            
#>  [45] "this"           "error"          "process"        "and"           
#>  [49] "hence"          "to"             "obtain"         "accurate"      
#>  [53] "inferences"     "that"           "involve"        "the"           
#>  [57] "error"          "prone"          "variables"      "in"            
#>  [61] "some"           "settings"       "however"        "analysts"      
#>  [65] "have"           "access"         "to"             "a"             
#>  [69] "data"           "source"         "on"             "different"     
#>  [73] "in"             "dividuals"      "with"           "high"          
#>  [77] "quality"        "measurements"   "of"             "the"           
#>  [81] "error"          "prone"          "survey"         "items"         
#>  [85] "we"             "present"        "a"              "data"          
#>  [89] "fusion"         "framework"      "for"            "leveraging"    
#>  [93] "this"           "information"    "to"             "improve"       
#>  [97] "infer"          "ences"          "in"             "the"           
#> [101] "error"          "prone"          "survey"         "the"           
#> [105] "basic"          "idea"           "is"             "to"            
#> [109] "posit"          "models"         "about"          "the"           
#> [113] "rates"          "at"             "which"          "individuals"   
#> [117] "make"           "errors"         "coupled"        "with"          
#> [121] "models"         "for"            "the"            "values"        
#> [125] "reported"       "when"           "errors"         "are"           
#> [129] "made"           "this"           "can"            "avoid"         
#> [133] "the"            "unrealistic"    "assumption"     "of"            
#> [137] "conditional"    "independence"   "typically"      "used"          
#> [141] "in"             "data"           "fusion"         "we"            
#> [145] "apply"          "the"            "approach"       "on"            
#> [149] "the"            "re"             "ported"         "values"        
#> [153] "of"             "educational"    "attainments"    "in"            
#> [157] "the"            "american"       "community"      "survey"        
#> [161] "using"          "the"            "national"       "survey"        
#> [165] "of"             "college"        "graduates"      "as"            
#> [169] "the"            "high"           "quality"        "data"          
#> [173] "source"         "in"             "doing"          "so"            
#> [177] "we"             "account"        "for"            "the"           
#> [181] "informative"    "sampling"       "design"         "used"          
#> [185] "to"             "select"         "the"            "national"      
#> [189] "survey"         "of"             "college"        "graduates"     
#> [193] "we"             "also"           "present"        "a"             
#> [197] "process"        "for"            "assessing"      "the"           
#> [201] "sensitivity"    "of"             "various"        "analyses"      
#> [205] "to"             "different"      "choices"        "for"           
#> [209] "the"            "measurement"    "error"          "models"        
#> [213] "supplemental"   "material"       "is"             "available"     
#> [217] "online"         "key"            "words"          "fusion"        
#> [221] "imputation"     "measurement"    "error"          "missing"       
#> [225] "survey"         "this"           "research"       "was"           
#> [229] "supported"      "by"             "the"            "national"      
#> [233] "science"        "foundation"     "under"          "award"         
#> [237] "ses"            "11"             "31897"          "the"           
#> [241] "authors"        "wish"           "to"             "thank"         
#> [245] "seth"           "sanders"        "for"            "his"           
#> [249] "input"          "on"             "informative"    "prior"         
#> [253] "specifications" "and"            "mauricio"       "sadinle"       
#> [257] "for"            "discussion"     "that"           "improved"      
#> [261] "the"            "strategy"       "for"            "accounting"    
#> [265] "for"            "the"            "informative"    "sample"        
#> [269] "design"         "1"             
#> 
#> 
#> [[2]]
#> [[2]][[1]]
#>   [1] "1"             "introduction"  "survey"        "data"         
#>   [5] "often"         "contain"       "items"         "that"         
#>   [9] "are"           "subject"       "to"            "measurement"  
#>  [13] "errors"        "for"           "example"       "some"         
#>  [17] "respondents"   "might"         "misunderstand" "a"            
#>  [21] "question"      "or"            "accidentally"  "select"       
#>  [25] "the"           "wrong"         "response"      "thereby"      
#>  [29] "providing"     "values"        "unequal"       "to"           
#>  [33] "their"         "factual"       "values"        "left"         
#>  [37] "uncorrected"   "these"         "measurement"   "errors"       
#>  [41] "can"           "result"        "in"            "degraded"     
#>  [45] "inferences"    "kim"           "et"            "al"           
#>  [49] "2015"          "unfor"         "tunately"      "the"          
#>  [53] "distribution"  "of"            "the"           "measurement"  
#>  [57] "errors"        "typically"     "is"            "not"          
#>  [61] "estimable"     "from"          "the"           "survey"       
#>  [65] "data"          "alone"         "one"           "either"       
#>  [69] "needs"         "to"            "make"          "strong"       
#>  [73] "assumptions"   "about"         "the"           "measure"      
#>  [77] "ment"          "error"         "process"       "e"            
#>  [81] "g"             "as"            "in"            "curran"       
#>  [85] "and"           "hussong"       "2009"          "or"           
#>  [89] "leverage"      "information"   "from"          "some"         
#>  [93] "other"         "source"        "of"            "data"         
#>  [97] "as"            "we"            "do"            "here"         
#> [101] "one"           "natural"       "source"        "of"           
#> [105] "information"   "is"            "a"             "validation"   
#> [109] "sample"        "i"             "e"             "a"            
#> [113] "dataset"       "with"          "both"          "the"          
#> [117] "reported"      "possibly"      "erroneous"     "values"       
#> [121] "and"           "the"           "true"          "values"       
#> [125] "measured"      "on"            "the"           "same"         
#> [129] "individuals"   "these"         "individuals"   "could"        
#> [133] "be"            "a"             "subset"        "of"           
#> [137] "the"           "original"      "survey"        "pepe"         
#> [141] "1992"          "yucel"         "and"           "zaslavsky"    
#> [145] "2005"          "or"            "a"             "completely"   
#> [149] "distinct"      "set"           "raghunathan"   "2006"         
#> [153] "schenker"      "and"           "raghunathan"   "2007"         
#> [157] "schenker"      "et"            "al"            "2010"         
#> [161] "carrig"        "et"            "al"            "2015"         
#> [165] "with"          "validation"    "data"          "one"          
#> [169] "can"           "model"         "the"           "relationship" 
#> [173] "between"       "the"           "error"         "prone"        
#> [177] "and"           "true"          "values"        "and"          
#> [181] "use"           "the"           "model"         "to"           
#> [185] "replace"       "the"           "error"         "prone"        
#> [189] "items"         "with"          "multiply"      "imputed"      
#> [193] "plausible"     "true"          "values"        "reiter"       
#> [197] "2008"          "siddique"      "et"            "al"           
#> [201] "2015"          "in"            "many"          "settings"     
#> [205] "however"       "it"            "is"            "not"          
#> [209] "possible"      "to"            "obtain"        "validation"   
#> [213] "samples"       "e"             "g"             "because"      
#> [217] "it"            "is"            "too"           "expensive"    
#> [221] "or"            "because"       "someone"       "other"        
#> [225] "than"          "the"           "analyst"       "collected"    
#> [229] "the"           "data"          "in"            "such"         
#> [233] "cases"         "another"       "potential"     "source"       
#> [237] "of"            "information"   "is"            "a"            
#> [241] "separate"      "gold"          "stan"          "dard"         
#> [245] "dataset"       "that"          "includes"      "true"         
#> [249] "or"            "at"            "least"         "very"         
#> [253] "high"          "quality"       "measurements"  "of"           
#> [257] "the"           "items"         "subject"       "to"           
#> [261] "error"         "but"           "not"           "the"          
#> [265] "error"         "prone"         "measurements"  "unlike"       
#> [269] "validation"    "sam"           "ples"          "the"          
#> [273] "gold"          "standard"      "dataset"       "alone"        
#> [277] "does"          "not"           "provide"       "enough"       
#> [281] "information"   "to"            "estimate"      "the"          
#> [285] "relationship"  "between"       "the"           "error"        
#> [289] "prone"         "and"           "true"          "values"       
#> [293] "it"            "only"          "provides"      "information"  
#> [297] "about"         "the"           "distribution"  "of"           
#> [301] "the"           "true"          "values"        "thus"         
#> [305] "analysts"      "are"           "faced"         "with"         
#> [309] "a"             "special"       "case"          "2"            
#> 
#> 
#> [[3]]
#> [[3]][[1]]
#>   [1] "of"             "data"           "fusion"         "rubin"         
#>   [5] "1986"           "moriarity"      "and"            "scheuren"      
#>   [9] "2001"           "rassler"        "2002"           "d’orazio"      
#>  [13] "et"             "al"             "2006"           "reiter"        
#>  [17] "2012"           "fosdick"        "et"             "al"            
#>  [21] "2016"           "i"              "e"              "integrating"   
#>  [25] "information"    "from"           "two"            "databases"     
#>  [29] "with"           "disjoint"       "sets"           "of"            
#>  [33] "individuals"    "and"            "distinct"       "variables"     
#>  [37] "one"            "default"        "approach"       "common"        
#>  [41] "in"             "other"          "data"           "fusion"        
#>  [45] "contexts"       "is"             "to"             "assume"        
#>  [49] "that"           "the"            "error"          "prone"         
#>  [53] "and"            "true"           "values"         "are"           
#>  [57] "conditionally"  "independent"    "given"          "some"          
#>  [61] "set"            "of"             "variables"      "x"             
#>  [65] "common"         "to"             "both"           "the"           
#>  [69] "survey"         "and"            "gold"           "standard"      
#>  [73] "data"           "effectively"    "this"           "involves"      
#>  [77] "using"          "the"            "gold"           "standard"      
#>  [81] "data"           "to"             "estimate"       "a"             
#>  [85] "predictive"     "model"          "for"            "the"           
#>  [89] "true"           "values"         "from"           "x"             
#>  [93] "and"            "applying"       "the"            "estimated"     
#>  [97] "model"          "to"             "impute"         "replacements"  
#> [101] "for"            "all"            "values"         "of"            
#> [105] "the"            "error"          "prone"          "items"         
#> [109] "in"             "the"            "survey"         "however"       
#> [113] "this"           "conditional"    "independence"   "assumption"    
#> [117] "completely"     "disregards"     "the"            "information"   
#> [121] "in"             "the"            "error"          "prone"         
#> [125] "values"         "which"          "sacrifices"     "potentially"   
#> [129] "useful"         "information"    "for"            "example"       
#> [133] "consider"       "national"       "surveys"        "that"          
#> [137] "ask"            "people"         "to"             "report"        
#> [141] "their"          "educational"    "attainment"     "we"            
#> [145] "might"          "expect"         "most"           "people"        
#> [149] "to"             "report"         "values"         "accurately"    
#> [153] "and"            "only"           "a"              "modest"        
#> [157] "fraction"       "to"             "make"           "errors"        
#> [161] "it"             "does"           "not"            "make"          
#> [165] "sense"          "to"             "alter"          "every"         
#> [169] "individual’s"   "reported"       "values"         "in"            
#> [173] "the"            "survey"         "as"             "would"         
#> [177] "be"             "done"           "using"          "a"             
#> [181] "conditional"    "independence"   "approach"       "in"            
#> [185] "this"           "article"        "we"             "develop"       
#> [189] "a"              "framework"      "for"            "leveraging"    
#> [193] "information"    "from"           "gold"           "stan"          
#> [197] "dard"           "data"           "to"             "improve"       
#> [201] "inferences"     "in"             "surveys"        "subject"       
#> [205] "to"             "measurement"    "errors"         "the"           
#> [209] "basic"          "idea"           "is"             "to"            
#> [213] "encode"         "plausible"      "assumptions"    "about"         
#> [217] "the"            "error"          "process"        "e"             
#> [221] "g"              "most"           "people"         "do"            
#> [225] "not"            "make"           "errors"         "when"          
#> [229] "reporting"      "educational"    "attainments"    "and"           
#> [233] "the"            "reporting"      "process"        "e"             
#> [237] "g"              "when"           "people"         "make"          
#> [241] "errors"         "they"           "are"            "more"          
#> [245] "likely"         "to"             "report"         "higher"        
#> [249] "attainments"    "than"           "actual"         "into"          
#> [253] "statistical"    "models"         "we"             "couple"        
#> [257] "those"          "models"         "with"           "distributions" 
#> [261] "for"            "the"            "un"             "derlying"      
#> [265] "true"           "data"           "values"         "and"           
#> [269] "use"            "multiple"       "imputation"     "to"            
#> [273] "create"         "plausible"      "corrections"    "to"            
#> [277] "the"            "error"          "prone"          "survey"        
#> [281] "values"         "which"          "then"           "can"           
#> [285] "be"             "analyzed"       "using"          "the"           
#> [289] "methods"        "from"           "ru"             "bin"           
#> [293] "1987"           "this"           "allows"         "us"            
#> [297] "to"             "avoid"          "unrealistic"    "conditional"   
#> [301] "independence"   "assumptions"    "in"             "lieu"          
#> [305] "of"             "more"           "scientifically" "defensible"    
#> [309] "models"         "the"            "remainder"      "of"            
#> [313] "this"           "article"        "is"             "organized"     
#> [317] "as"             "follows"        "in"             "section"       
#> [321] "2"              "we"             "review"         "an"            
#> [325] "3"             
#> 
#> 
#> [[4]]
#> [[4]][[1]]
#>   [1] "example"        "of"             "misreporting"   "of"            
#>   [5] "educational"    "attainment"     "in"             "data"          
#>   [9] "collected"      "by"             "the"            "census"        
#>  [13] "bureau"         "so"             "as"             "to"            
#>  [17] "motivate"       "the"            "methodological" "developments"  
#>  [21] "in"             "section"        "3"              "we"            
#>  [25] "intro"          "duce"           "the"            "general"       
#>  [29] "framework"      "for"            "specifying"     "measurement"   
#>  [33] "error"          "models"         "to"             "leverage"      
#>  [37] "the"            "information"    "in"             "gold"          
#>  [41] "standard"       "data"           "in"             "section"       
#>  [45] "4"              "we"             "apply"          "the"           
#>  [49] "framework"      "to"             "handle"         "po"            
#>  [53] "tential"        "measurement"    "error"          "in"            
#>  [57] "educational"    "attainment"     "in"             "the"           
#>  [61] "2010"           "american"       "community"      "survey"        
#>  [65] "acs"            "using"          "the"            "2010"          
#>  [69] "national"       "survey"         "of"             "college"       
#>  [73] "graduates"      "nscg"           "as"             "a"             
#>  [77] "gold"           "standard"       "file"           "in"            
#>  [81] "doing"          "so"             "we"             "deal"          
#>  [85] "with"           "a"              "key"            "complication"  
#>  [89] "in"             "the"            "data"           "integration"   
#>  [93] "accounting"     "for"            "the"            "informative"   
#>  [97] "sampling"       "design"         "used"           "to"            
#> [101] "sample"         "the"            "nscg"           "we"            
#> [105] "also"           "demonstrate"    "how"            "the"           
#> [109] "framework"      "facilitates"    "analysis"       "of"            
#> [113] "the"            "sensitivity"    "of"             "conclusions"   
#> [117] "to"             "different"      "measurement"    "error"         
#> [121] "model"          "specifications" "in"             "section"       
#> [125] "5"              "we"             "provide"        "a"             
#> [129] "brief"          "summary"        "2"              "misreporting"  
#> [133] "in"             "educational"    "attainment"     "to"            
#> [137] "illustrate"     "the"            "potential"      "for"           
#> [141] "reporting"      "errors"         "in"             "educational"   
#> [145] "attainment"     "that"           "can"            "arise"         
#> [149] "in"             "surveys"        "we"             "examine"       
#> [153] "data"           "from"           "the"            "1993"          
#> [157] "nscg"           "the"            "1993"           "nscg"          
#> [161] "surveyed"       "individuals"    "who"            "indicated"     
#> [165] "on"             "the"            "1990"           "census"        
#> [169] "long"           "form"           "that"           "they"          
#> [173] "had"            "at"             "least"          "a"             
#> [177] "college"        "degree"         "fesco"          "et"            
#> [181] "al"             "2012"           "the"            "questionnaire" 
#> [185] "asked"          "about"          "educational"    "attainment"    
#> [189] "including"      "detailed"       "questions"      "about"         
#> [193] "educational"    "histories"      "these"          "questions"     
#> [197] "greatly"        "reduce"         "the"            "possibility"   
#> [201] "of"             "respondent"     "error"          "so"            
#> [205] "that"           "the"            "educational"    "attainment"    
#> [209] "values"         "in"             "the"            "nscg"          
#> [213] "can"            "be"             "considered"     "a"             
#> [217] "gold"           "standard"       "black"          "et"            
#> [221] "al"             "2003"           "the"            "census"        
#> [225] "long"           "form"           "in"             "contrast"      
#> [229] "did"            "not"            "include"        "detailed"      
#> [233] "follow"         "up"             "questions"      "so"            
#> [237] "that"           "reported"       "educational"    "attainment"    
#> [241] "is"             "prone"          "to"             "measurement"   
#> [245] "error"          "the"            "census"         "bureau"        
#> [249] "linked"         "each"           "individual"     "in"            
#> [253] "the"            "nscg"           "to"             "their"         
#> [257] "corresponding"  "record"         "in"             "the"           
#> [261] "long"           "form"           "data"           "the"           
#> [265] "linked"         "file"           "is"             "available"     
#> [269] "for"            "download"       "from"           "the"           
#> [273] "inter"          "university"     "consortium"     "for"           
#> [277] "political"      "and"            "social"         "research"      
#> [281] "national"       "science"        "foundation"     "4"             
#> 
#> 
#> [[5]]
#> [[5]][[1]]
#>   [1] "table"        "1"            "unweighted"   "cross"        "tabulation"  
#>   [6] "of"           "reported"     "education"    "in"           "the"         
#>  [11] "nscg"         "and"          "census"       "long"         "form"        
#>  [16] "from"         "the"          "linked"       "dataset"      "ba"          
#>  [21] "stands"       "for"          "bachelor’s"   "degree"       "ma"          
#>  [26] "stands"       "for"          "master’s"     "degree"       "prof"        
#>  [31] "stands"       "for"          "professional" "degree"       "and"         
#>  [36] "phd"          "stands"       "for"          "ph"           "d"           
#>  [41] "degree"       "the"          "14,319"       "individuals"  "in"          
#>  [46] "the"          "group"        "labeled"      "no"           "degree"      
#>  [51] "did"          "not"          "have"         "a"            "college"     
#>  [56] "degree"       "despite"      "reporting"    "otherwise"    "the"         
#>  [61] "51,396"       "individuals"  "in"           "the"          "group"       
#>  [66] "labeled"      "other"        "did"          "not"          "have"        
#>  [71] "one"          "of"           "ba"           "ma"           "prof"        
#>  [76] "phd"          "and"          "are"          "discarded"    "from"        
#>  [81] "subsequent"   "analyses"     "census"       "reported"     "education"   
#>  [86] "z"            "ba"           "ma"           "prof"         "phd"         
#>  [91] "total"        "ba"           "89580"        "4109"         "1241"        
#>  [96] "249"          "95179"        "nscg"         "ma"           "1218"        
#> [101] "33928"        "655"          "526"          "36327"        "reported"    
#> [106] "prof"         "382"          "359"          "8648"         "563"         
#> [111] "9952"         "education"    "phd"          "99"           "193"         
#> [116] "452"          "6726"         "7470"         "total"        "91279"       
#> [121] "38589"        "10996"        "8064"         "148928"       "no"          
#> [126] "degree"       "10150"        "1792"         "2040"         "337"         
#> [131] "14319"        "other"        "33368"        "10912"        "4710"        
#> [136] "2406"         "51396"        "1993"         "because"      "of"          
#> [141] "the"          "linkages"     "we"           "can"          "characterize"
#> [146] "the"          "actual"       "measurement"  "error"        "mechanism"   
#> [151] "for"          "educational"  "attainment"   "in"           "the"         
#> [156] "1990"         "long"         "form"         "data"         "in"          
#> [161] "the"          "nscg"         "we"           "treat"        "the"         
#> [166] "highest"      "degree"       "of"           "the"          "three"       
#> [171] "most"         "recent"       "degrees"      "reported"     "coded"       
#> [176] "as"           "ed6c1"        "ed6c2"        "and"          "ed6c3"       
#> [181] "in"           "the"          "file"         "as"           "the"         
#> [186] "true"         "education"    "level"        "we"           "disregard"   
#> [191] "any"          "degrees"      "earned"       "in"           "the"         
#> [196] "years"        "1990"         "1993"         "as"           "these"       
#> [201] "occur"        "in"           "the"          "three"        "year"        
#> [206] "gap"          "between"      "collection"   "of"           "the"         
#> [211] "long"         "form"         "and"          "nscg"         "data"        
#> [216] "this"         "ensures"      "consistent"   "time"         "frames"      
#> [221] "for"          "the"          "nscg"         "and"          "long"        
#> [226] "form"         "reported"     "values"       "we"           "cross"       
#> [231] "tabulate"     "these"        "degrees"      "with"         "the"         
#> [236] "degrees"      "reported"     "in"           "the"          "long"        
#> [241] "form"         "data"         "coded"        "yearsch"      "in"          
#> [246] "the"          "file"         "table"        "1"            "displays"    
#> [251] "the"          "cross"        "tabulation"   "a"            "similar"     
#> [256] "analysis"     "was"          "done"         "by"           "black"       
#> [261] "et"           "al"           "2003"         "as"           "evident"     
#> [266] "in"           "table"        "1"            "reported"     "education"   
#> [271] "levels"       "on"           "the"          "long"         "form"        
#> [276] "often"        "are"          "higher"       "than"         "those"       
#> [281] "on"           "the"          "nscg"         "particularly" "for"         
#> [286] "individuals"  "with"         "only"         "a"            "bachelor’s"  
#> [291] "degree"       "of"           "the"          "163,247"      "individuals" 
#> [296] "in"           "scope"        "in"           "the"          "nscg"        
#> [301] "over"         "14,000"       "were"         "determined"   "not"         
#> [306] "to"           "have"         "at"           "least"        "a"           
#> [311] "bachelor’s"   "degree"       "when"         "asked"        "in"          
#> [316] "the"          "nscg"         "despite"      "reporting"    "otherwise"   
#> [321] "5"           
#> 
#> 
#> [[6]]
#> [[6]][[1]]
#>   [1] "in"              "the"             "long"            "form"           
#>   [5] "a"               "whopping"        "33"              "of"             
#>   [9] "individuals"     "who"             "reported"        "being"          
#>  [13] "professionals"   "in"              "the"             "long"           
#>  [17] "form"            "actually"        "are"             "not"            
#>  [21] "professionals"   "according"       "to"              "the"            
#>  [25] "nscg"            "one"             "possible"        "explanation"    
#>  [29] "for"             "this"            "error"           "is"             
#>  [33] "confusion"       "over"            "the"             "definition"     
#>  [37] "of"              "professionals"   "the"             "census"         
#>  [41] "bureau"          "intended"        "the"             "category"       
#>  [45] "to"              "capture"         "graduate"        "degrees"        
#>  [49] "from"            "universities"    "e"               "g"              
#>  [53] "j"               "d"               "m"               "b"              
#>  [57] "a"               "m"               "d"               "whereas"        
#>  [61] "black"           "et"              "al"              "2003"           
#>  [65] "found"           "that"            "individuals"     "in"             
#>  [69] "professions"     "such"            "as"              "cosmetology"    
#>  [73] "nursing"         "and"             "health"          "services"       
#>  [77] "which"           "require"         "certifications"  "but"            
#>  [81] "not"             "graduate"        "degrees"         "selected"       
#>  [85] "the"             "category"        "in"              "spite"          
#>  [89] "of"              "the"             "nontrivial"      "reporting"      
#>  [93] "error"           "the"             "overwhelming"    "majority"       
#>  [97] "of"              "individuals"     "reported"        "education"      
#> [101] "levels"          "are"             "consistent"      "in"             
#> [105] "the"             "long"            "form"            "and"            
#> [109] "in"              "the"             "nscg"            "of"             
#> [113] "the"             "individuals"     "in"              "the"            
#> [117] "nscg"            "who"             "had"             "at"             
#> [121] "least"           "a"               "college"         "degree"         
#> [125] "at"              "the"             "time"            "of"             
#> [129] "the"             "1990"            "census"          "about"          
#> [133] "93.3"            "of"              "them"            "have"           
#> [137] "the"             "same"            "contemporaneous" "education"      
#> [141] "levels"          "in"              "both"            "files"          
#> [145] "this"            "suggests"        "that"            "most"           
#> [149] "people"          "report"          "correctly"       "an"             
#> [153] "observation"     "we"              "want"            "to"             
#> [157] "leverage"        "when"            "constructing"    "measurement"    
#> [161] "error"           "models"          "for"             "education"      
#> [165] "in"              "the"             "2010"            "acs"            
#> [169] "in"              "most"            "situations"      "we"             
#> [173] "do"              "not"             "have"            "the"            
#> [177] "good"            "fortune"         "of"              "observing"      
#> [181] "individuals"     "error"           "prone"           "and"            
#> [185] "true"            "values"          "simultaneously"  "instead"        
#> [189] "we"              "are"             "in"              "the"            
#> [193] "setting"         "represented"     "by"              "figure"         
#> [197] "1"               "this"            "is"              "also"           
#> [201] "the"             "case"            "in"              "our"            
#> [205] "analysis"        "of"              "educational"     "attainments"    
#> [209] "in"              "the"             "2010"            "acs"            
#> [213] "described"       "in"              "section"         "4"              
#> [217] "the"             "sampling"        "frame"           "for"            
#> [221] "the"             "2010"            "nscg"            "is"             
#> [225] "constructed"     "from"            "reported"        "education"      
#> [229] "levels"          "in"              "the"             "acs"            
#> [233] "which"           "replaced"        "the"             "long"           
#> [237] "form"            "after"           "the"             "2000"           
#> [241] "census"          "however"         "unlike"          "in"             
#> [245] "1993"            "linked"          "data"            "are"            
#> [249] "not"             "available"       "as"              "public"         
#> [253] "use"             "files"           "therefore"       "we"             
#> [257] "treat"           "the"             "2010"            "nscg"           
#> [261] "as"              "gold"            "standard"        "data"           
#> [265] "and"             "posit"           "measurement"     "models"         
#> [269] "that"            "connect"         "the"             "information"    
#> [273] "from"            "the"             "two"             "data"           
#> [277] "sources"         "using"           "the"             "framework"      
#> [281] "that"            "we"              "now"             "describe"       
#> [285] "6"              
#> 
#> 
#> [[7]]
#> [[7]][[1]]
#>   [1] "x"              "y"              "z"              "de"            
#>   [5] "x"              "x"              "dg"             "x"             
#>   [9] "x"              "figure"         "1"              "graphical"     
#>  [13] "representation" "of"             "data"           "fusion"        
#>  [17] "set"            "up"             "in"             "the"           
#>  [21] "survey"         "data"           "de"             "we"            
#>  [25] "only"           "observe"        "the"            "error"         
#>  [29] "prone"          "measurement"    "z"              "but"           
#>  [33] "not"            "the"            "true"           "value"         
#>  [37] "y"              "in"             "the"            "gold"          
#>  [41] "standard"       "data"           "dg"             "we"            
#>  [45] "only"           "observe"        "y"              "but"           
#>  [49] "not"            "z"              "we"             "observe"       
#>  [53] "variables"      "x"              "in"             "both"          
#>  [57] "samples"        "3"              "measurement"    "error"         
#>  [61] "modeling"       "via"            "data"           "fusion"        
#>  [65] "as"             "in"             "figure"         "1"             
#>  [69] "let"            "de"             "and"            "dg"            
#>  [73] "be"             "two"            "data"           "sources"       
#>  [77] "comprising"     "distinct"       "individuals"    "with"          
#>  [81] "sample"         "sizes"          "ne"             "and"           
#>  [85] "ng"             "respectively"   "for"            "each"          
#>  [89] "individual"     "i"              "in"             "dg"            
#>  [93] "or"             "de"             "let"            "xi"            
#>  [97] "xi1"            "xip"            "be"             "variables"     
#> [101] "common"         "to"             "both"           "surveys"       
#> [105] "such"           "as"             "demographic"    "variables"     
#> [109] "we"             "assume"         "these"          "variables"     
#> [113] "have"           "been"           "harmonized"     "d’orazio"      
#> [117] "et"             "al"             "2006"           "across"        
#> [121] "dg"             "and"            "de"             "and"           
#> [125] "are"            "free"           "of"             "errors"        
#> [129] "let"            "y"              "represent"      "the"           
#> [133] "error"          "free"           "values"         "of"            
#> [137] "some"           "variable"       "of"             "interest"      
#> [141] "and"            "let"            "z"              "be"            
#> [145] "an"             "error"          "prone"          "version"       
#> [149] "of"             "y"              "we"             "observe"       
#> [153] "z"              "but"            "not"            "y"             
#> [157] "for"            "the"            "ne"             "individuals"   
#> [161] "in"             "de"             "we"             "observe"       
#> [165] "y"              "but"            "not"            "z"             
#> [169] "for"            "the"            "ng"             "individuals"   
#> [173] "in"             "dg"             "for"            "simplicity"    
#> [177] "of"             "notation"       "we"             "assume"        
#> [181] "no"             "missing"        "values"         "in"            
#> [185] "any"            "variable"       "although"       "the"           
#> [189] "multiple"       "imputation"     "framework"      "easily"        
#> [193] "handles"        "missing"        "values"         "additionally"  
#> [197] "de"             "can"            "include"        "variables"     
#> [201] "for"            "which"          "there"          "is"            
#> [205] "no"             "corresponding"  "variable"       "in"            
#> [209] "dg"             "these"          "variables"      "do"            
#> [213] "not"            "play"           "a"              "role"          
#> [217] "in"             "the"            "measurement"    "error"         
#> [221] "modeling"       "although"       "they"           "can"           
#> [225] "be"             "used"           "in"             "multiple"      
#> [229] "imputation"     "inferences"     "we"             "seek"          
#> [233] "to"             "estimate"       "pr"             "y"             
#> [237] "z"              "x"              "and"            "use"           
#> [241] "it"             "to"             "create"         "multiple"      
#> [245] "imputations"    "for"            "the"            "missing"       
#> [249] "values"         "in"             "y"              "for"           
#> [253] "the"            "individuals"    "in"             "de"            
#> [257] "we"             "do"             "so"             "for"           
#> [261] "the"            "common"         "setting"        "where"         
#> [265] "x"              "y"              "z"              "are"           
#> [269] "all"            "categorical"    "variables"      "similar"       
#> [273] "ideas"          "apply"          "for"            "other"         
#> [277] "data"           "types"          "for"            "j"             
#> [281] "1"              "p"              "let"            "each"          
#> [285] "xj"             "have"           "dj"             "levels"        
#> [289] "let"            "z"              "have"           "dz"            
#> [293] "levels"         "and"            "y"              "have"          
#> [297] "dy"             "7"             
#> 
#> 
#> [[8]]
#> [[8]][[1]]
#>   [1] "levels"        "typically"     "dz"            "dy"           
#>   [5] "but"           "this"          "need"          "not"          
#>   [9] "be"            "the"           "case"          "generally"    
#>  [13] "for"           "example"       "in"            "the"          
#>  [17] "nscg"          "acs"           "application"   "z"            
#>  [21] "is"            "the"           "educational"   "attainment"   
#>  [25] "among"         "those"         "who"           "report"       
#>  [29] "a"             "college"       "degree"        "in"           
#>  [33] "the"           "acs"           "which"         "has"          
#>  [37] "dz"            "4"             "levels"        "bachelor’s"   
#>  [41] "degree"        "master’s"      "degree"        "professional" 
#>  [45] "degree"        "or"            "ph"            "d"            
#>  [49] "degree"        "and"           "y"             "is"           
#>  [53] "the"           "educational"   "attainment"    "in"           
#>  [57] "the"           "nscg"          "which"         "has"          
#>  [61] "dy"            "5"             "levels"        "an"           
#>  [65] "additional"    "level"         "is"            "needed"       
#>  [69] "because"       "some"          "individuals"   "in"           
#>  [73] "the"           "nscg"          "truly"         "do"           
#>  [77] "not"           "have"          "a"             "college"      
#>  [81] "degree"        "for"           "all"           "i"            
#>  [85] "de"            "let"           "ei"            "be"           
#>  [89] "an"            "unobserved"    "indicator"     "of"           
#>  [93] "a"             "reporting"     "error"         "that"         
#>  [97] "is"            "ei"            "1"             "when"         
#> [101] "yi"            "6"             "zi"            "and"          
#> [105] "ei"            "0"             "otherwise"     "using"        
#> [109] "e"             "enables"       "us"            "to"           
#> [113] "write"         "pr"            "y"             "z"            
#> [117] "x"             "as"            "a"             "product"      
#> [121] "of"            "three"         "sub"           "models"       
#> [125] "for"           "individual"    "i"             "the"          
#> [129] "full"          "data"          "likelihood"    "omitting"     
#> [133] "parameters"    "for"           "simplicity"    "can"          
#> [137] "be"            "factored"      "as"            "pr"           
#> [141] "yi"            "k"             "zi"            "l"            
#> [145] "xi"            "pr"            "yi"            "k"            
#> [149] "xi"            "pr"            "ei"            "e"            
#> [153] "yi"            "k"             "xi"            "pr"           
#> [157] "zi"            "l"             "ei"            "e"            
#> [161] "yi"            "k"             "xi"            "1"            
#> [165] "this"          "separates"     "the"           "true"         
#> [169] "data"          "generation"    "process"       "and"          
#> [173] "the"           "measurement"   "error"         "generation"   
#> [177] "process"       "which"         "facilitates"   "model"        
#> [181] "specification" "in"            "particular"    "we"           
#> [185] "can"           "use"           "dg"            "to"           
#> [189] "estimate"      "the"           "true"          "data"         
#> [193] "distribution"  "pr"            "y"             "x"            
#> [197] "we"            "then"          "can"           "posit"        
#> [201] "different"     "models"        "for"           "the"          
#> [205] "rates"         "of"            "making"        "errors"       
#> [209] "pr"            "ei"            "e"             "yi"           
#> [213] "k"             "xi"            "and"           "for"          
#> [217] "the"           "reported"      "values"        "when"         
#> [221] "errors"        "are"           "made"          "pr"           
#> [225] "zi"            "l"             "ei"            "1"            
#> [229] "yi"            "k"             "xi"            "intuitively"  
#> [233] "the"           "error"         "model"         "locates"      
#> [237] "the"           "records"       "for"           "which"        
#> [241] "yi"            "6"             "zi"            "and"          
#> [245] "the"           "reporting"     "model"         "captures"     
#> [249] "the"           "patterns"      "of"            "misreported"  
#> [253] "zi"            "of"            "course"        "when"         
#> [257] "ei"            "0"             "pr"            "zi"           
#> [261] "yi"            "1"             "a"             "similar"      
#> [265] "factorization" "is"            "used"          "by"           
#> [269] "yucel"         "and"           "zaslavsky"     "2005"         
#> [273] "he"            "et"            "al"            "2014"         
#> [277] "kim"           "et"            "al"            "2015"         
#> [281] "and"           "manrique"      "vallier"       "and"          
#> [285] "reiter"        "2016"          "among"         "others"       
#> [289] "by"            "construction"  "dg"            "and"          
#> [293] "de"            "cannot"        "be"            "used"         
#> [297] "to"            "estimate"      "any"           "of"           
#> [301] "the"           "conditional"   "probabilities" "pr"           
#> [305] "y"             "z"             "x"             "directly"     
#> [309] "hence"         "we"            "have"          "to"           
#> [313] "restrict"      "the"           "number"        "and"          
#> [317] "types"         "of"            "parameters"    "in"           
#> [321] "the"           "sub"           "models"        "in"           
#> [325] "1"             "put"           "another"       "way"          
#> [329] "if"            "we"            "tried"         "to"           
#> [333] "estimate"      "a"             "fully"         "8"            
#> 
#> 
#> [[9]]
#> [[9]][[1]]
#>   [1] "saturated"      "model"          "for"            "e"             
#>   [5] "z"              "x"              "we"             "would"         
#>   [9] "not"            "be"             "able"           "to"            
#>  [13] "identify"       "all"            "the"            "parameters"    
#>  [17] "by"             "using"          "dg"             "and"           
#>  [21] "de"             "alone"          "to"             "see"           
#>  [25] "this"           "assume"         "for"            "the"           
#>  [29] "moment"         "that"           "all"            "dx"            
#>  [33] "πpj"            "1"              "dj"             "possible"      
#>  [37] "combinations"   "of"             "x"              "are"           
#>  [41] "present"        "in"             "dg"             "and"           
#>  [45] "de"             "to"             "estimate"       "the"           
#>  [49] "distribution"   "of"             "e"              "z"             
#>  [53] "x"              "using"          "a"              "fully"         
#>  [57] "saturated"      "model"          "we"             "require"       
#>  [61] "dy"             "1"              "dx"             "dz"            
#>  [65] "1"              "dy"             "dx"             "dy"            
#>  [69] "dz"             "1"              "dx"             "independent"   
#>  [73] "pieces"         "of"             "information"    "from"          
#>  [77] "dg"             "de"             "where"          "each"          
#>  [81] "subtraction"    "of"             "one"            "derives"       
#>  [85] "from"           "the"            "requirement"    "that"          
#>  [89] "probabilities"  "sum"            "to"             "one"           
#>  [93] "however"        "dg"             "and"            "de"            
#>  [97] "together"       "provide"        "only"           "dz"            
#> [101] "1"              "dx"             "dy"             "1"             
#> [105] "dx"             "dx"             "dz"             "dy"            
#> [109] "1"              "dx"             "independent"    "pieces"        
#> [113] "of"             "information"    "where"          "we"            
#> [117] "add"            "a"              "dx"             "to"            
#> [121] "properly"       "account"        "for"            "the"           
#> [125] "sum"            "to"             "one"            "constraint"    
#> [129] "a"              "key"            "insight"        "here"          
#> [133] "is"             "that"           "since"          "the"           
#> [137] "true"           "data"           "model"          "requires"      
#> [141] "dy"             "dx"             "parameters"     "to"            
#> [145] "estimate"       "the"            "joint"          "distribution"  
#> [149] "for"            "y"              "x"              "the"           
#> [153] "data"           "can"            "identify"       "at"            
#> [157] "most"           "dz"             "1"              "dx"            
#> [161] "parameters"     "in"             "the"            "error"         
#> [165] "and"            "reporting"      "models"         "combined"      
#> [169] "related"        "identification" "issues"         "arise"         
#> [173] "in"             "the"            "context"        "of"            
#> [177] "refreshment"    "sampling"       "to"             "adjust"        
#> [181] "for"            "nonignorable"   "attrition"      "in"            
#> [185] "longitudinal"   "studies"        "hirano"         "et"            
#> [189] "al"             "2001"           "schifeling"     "et"            
#> [193] "al"             "2015"           "si"             "et"            
#> [197] "al"             "2015"           "3.1"            "true"          
#> [201] "data"           "model"          "pr"             "yi"            
#> [205] "k"              "xi"             "one"            "can"           
#> [209] "use"            "any"            "model"          "for"           
#> [213] "y"              "x"              "that"           "adequately"    
#> [217] "describes"      "the"            "conditional"    "distri"        
#> [221] "bution"         "such"           "as"             "a"             
#> [225] "multinomial"    "logistic"       "regression"     "in"            
#> [229] "the"            "nscg"           "acs"            "application"   
#> [233] "we"             "use"            "a"              "fully"         
#> [237] "saturated"      "multinomial"    "model"          "accounting"    
#> [241] "for"            "the"            "informative"    "sampling"      
#> [245] "design"         "in"             "dg"             "using"         
#> [249] "the"            "approach"       "described"      "in"            
#> [253] "section"        "4.1"            "one"            "also"          
#> [257] "could"          "use"            "a"              "joint"         
#> [261] "distribution"   "for"            "y"              "x"             
#> [265] "such"           "as"             "a"              "log"           
#> [269] "linear"         "model"          "or"             "a"             
#> [273] "mixture"        "of"             "multinomials"   "model"         
#> [277] "dunson"         "and"            "xing"           "2009"          
#> [281] "si"             "and"            "reiter"         "2013"          
#> [285] "9"             
#> 
#> 
#> [[10]]
#> [[10]][[1]]
#>   [1] "3.2"           "error"         "model"         "pr"           
#>   [5] "ei"            "1"             "yi"            "xi"           
#>   [9] "in"            "cases"         "where"         "dy"           
#>  [13] "dz"            "a"             "generic"       "form"         
#>  [17] "for"           "the"           "error"         "model"        
#>  [21] "is"            "pr"            "ei"            "1"            
#>  [25] "xi"            "yi"            "k"             "g"            
#>  [29] "xi"            "yi"            "β"             "2"            
#>  [33] "where"         "g"             "xi"            "yi"           
#>  [37] "β"             "is"            "some"          "function"     
#>  [41] "of"            "its"           "arguments"     "and"          
#>  [45] "β"             "is"            "some"          "set"          
#>  [49] "of"            "unknown"       "parameters"    "a"            
#>  [53] "convenient"    "class"         "of"            "functions"    
#>  [57] "that"          "we"            "use"           "here"         
#>  [61] "is"            "the"           "logistic"      "regression"   
#>  [65] "of"            "ei"            "on"            "some"         
#>  [69] "design"        "vector"        "mi"            "derived"      
#>  [73] "from"          "xi"            "yi"            "with"         
#>  [77] "corresponding" "coefficients"  "β"             "the"          
#>  [81] "analyst"       "can"           "encode"        "different"    
#>  [85] "versions"      "of"            "mi"            "to"           
#>  [89] "represent"     "assumptions"   "about"         "the"          
#>  [93] "error"         "process"       "the"           "simplest"     
#>  [97] "specification" "is"            "to"            "set"          
#> [101] "each"          "mi"            "equal"         "to"           
#> [105] "a"             "vector"        "of"            "ones"         
#> [109] "which"         "implies"       "that"          "there"        
#> [113] "is"            "a"             "common"        "probability"  
#> [117] "of"            "error"         "for"           "all"          
#> [121] "individuals"   "this"          "error"         "model"        
#> [125] "makes"         "sense"         "when"          "the"          
#> [129] "analyst"       "believes"      "the"           "errors"       
#> [133] "in"            "z"             "occur"         "completely"   
#> [137] "at"            "random"        "for"           "example"      
#> [141] "when"          "errors"        "arise"         "simply"       
#> [145] "because"       "respondents"   "accidentally"  "and"          
#> [149] "randomly"      "select"        "the"           "wrong"        
#> [153] "response"      "in"            "the"           "survey"       
#> [157] "or"            "when"          "all"           "respondents"  
#> [161] "are"           "equally"       "likely"        "to"           
#> [165] "misunderstand" "the"           "survey"        "question"     
#> [169] "a"             "more"          "realistic"     "possibility"  
#> [173] "is"            "to"            "allow"         "the"          
#> [177] "probability"   "of"            "error"         "to"           
#> [181] "depend"        "on"            "some"          "variables"    
#> [185] "in"            "xi"            "but"           "not"          
#> [189] "on"            "yi"            "e"             "g"            
#> [193] "men"           "misreport"     "education"     "at"           
#> [197] "different"     "rates"         "than"          "women"        
#> [201] "this"          "could"         "be"            "encoded"      
#> [205] "by"            "including"     "an"            "intercept"    
#> [209] "for"           "one"           "of"            "the"          
#> [213] "sexes"         "in"            "mi"            "finally"      
#> [217] "one"           "can"           "allow"         "the"          
#> [221] "probability"   "of"            "error"         "to"           
#> [225] "depend"        "on"            "yi"            "itself"       
#> [229] "for"           "example"       "people"        "who"          
#> [233] "truly"         "do"            "not"           "have"         
#> [237] "at"            "least"         "a"             "college"      
#> [241] "degree"        "are"           "more"          "likely"       
#> [245] "to"            "misreport"     "by"            "including"    
#> [249] "some"          "function"      "of"            "it"           
#> [253] "in"            "mi"            "in"            "the"          
#> [257] "case"          "where"         "dz"            "6"            
#> [261] "dy"            "as"            "in"            "the"          
#> [265] "nscg"          "acs"           "application"   "we"           
#> [269] "automatically" "set"           "ei"            "1"            
#> [273] "for"           "any"           "individual"    "with"         
#> [277] "yi"            "1"             "dz"            "for"          
#> [281] "example"       "we"            "set"           "ei"           
#> [285] "1"             "for"           "all"           "individuals"  
#> [289] "who"           "are"           "determined"    "in"           
#> [293] "the"           "nscg"          "not"           "to"           
#> [297] "have"          "a"             "college"       "degree"       
#> [301] "but"           "report"        "so"            "in"           
#> [305] "the"           "acs"           "the"           "stochastic"   
#> [309] "part"          "of"            "the"           "error"        
#> [313] "model"         "only"          "applies"       "to"           
#> [317] "individuals"   "who"           "truly"         "have"         
#> [321] "at"            "least"         "a"             "bachelor’s"   
#> [325] "degree"        "10"           
#> 
#> 
#> [[11]]
#> [[11]][[1]]
#>   [1] "3.3"           "reporting"     "model"         "pr"           
#>   [5] "zi"            "ei"            "1"             "yi"           
#>   [9] "xi"            "when"          "there"         "is"           
#>  [13] "no"            "reporting"     "error"         "for"          
#>  [17] "individual"    "i"             "i"             "e"            
#>  [21] "ei"            "0"             "we"            "know"         
#>  [25] "that"          "zi"            "yi"            "when"         
#>  [29] "there"         "is"            "a"             "reporting"    
#>  [33] "error"         "we"            "must"          "model"        
#>  [37] "the"           "reported"      "value"         "zi"           
#>  [41] "as"            "with"          "2"             "one"          
#>  [45] "can"           "posit"         "a"             "variety"      
#>  [49] "of"            "distributions" "for"           "the"          
#>  [53] "reporting"     "error"         "which"         "is"           
#>  [57] "some"          "function"      "h"             "xi"           
#>  [61] "yi"            "α"             "with"          "parameters"   
#>  [65] "α"             "we"            "now"           "describe"     
#>  [69] "a"             "few"           "reporting"     "error"        
#>  [73] "models"        "for"           "illustration"  "one"          
#>  [77] "could"         "use"           "more"          "complicated"  
#>  [81] "models"        "e"             "g"             "based"        
#>  [85] "on"            "multinomial"   "logistic"      "regression"   
#>  [89] "as"            "well"          "a"             "simple"       
#>  [93] "model"         "assumes"       "that"          "values"       
#>  [97] "of"            "zi"            "are"           "equally"      
#> [101] "likely"        "as"            "in"            "manrique"     
#> [105] "vallier"       "and"           "reiter"        "2016"         
#> [109] "we"            "have"          "1"             "dz"           
#> [113] "1"             "if"            "l"             "6"            
#> [117] "k"             "k"             "1"             "dz"           
#> [121] "pr"            "zi"            "l"             "xi"           
#> [125] "yi"            "k"             "ei"            "1"            
#> [129] "1"             "dz"            "if"            "k"            
#> [133] "1"             "dz"            "3"             "0"            
#> [137] "otherwise"     "such"          "a"             "reporting"    
#> [141] "model"         "could"         "be"            "reasonable"   
#> [145] "when"          "reporting"     "errors"        "are"          
#> [149] "due"           "to"            "clerical"      "errors"       
#> [153] "we"            "note"          "that"          "this"         
#> [157] "model"         "does"          "not"           "accurately"   
#> [161] "characterize"  "the"           "reporting"     "errors"       
#> [165] "in"            "the"           "1993"          "linked"       
#> [169] "nscg"          "data"          "per"           "table"        
#> [173] "1"             "alternatively" "one"           "can"          
#> [177] "allow"         "the"           "probabilities" "to"           
#> [181] "depend"        "on"            "yi"            "so"           
#> [185] "that"          "zi"            "xi"            "yi"           
#> [189] "k"             "ei"            "1"             "categorical"  
#> [193] "pk"            "1"             "pk"            "dz"           
#> [197] "4"             "where"         "each"          "pk"           
#> [201] "l"             "is"            "the"           "probability"  
#> [205] "of"            "reporting"     "z"             "l"            
#> [209] "given"         "that"          "y"             "k"            
#> [213] "and"           "pk"            "k"             "0"            
#> [217] "one"           "can"           "further"       "parameterize" 
#> [221] "the"           "reporting"     "model"         "so"           
#> [225] "that"          "the"           "reporting"     "probabilities"
#> [229] "vary"          "with"          "x"             "for"          
#> [233] "example"       "to"            "make"          "the"          
#> [237] "probabilities" "vary"          "with"          "sex"          
#> [241] "and"           "true"          "education"     "11"           
#> 
#> 
#> [[12]]
#> [[12]][[1]]
#>   [1] "values"         "we"             "can"            "use"           
#>   [5] "categorical"    "pm"             "k"              "1"             
#>   [9] "pm"             "k"              "dz"             "if"            
#>  [13] "xi"             "sex"            "m"              "zi"            
#>  [17] "xi"             "yi"             "k"              "ei"            
#>  [21] "1"              "5"              "categorical"    "p"             
#>  [25] "f"              "k"              "1"              "pf"            
#>  [29] "k"              "dz"             "if"             "xi"            
#>  [33] "sex"            "f"              "3.4"            "specifying"    
#>  [37] "and"            "estimating"     "the"            "model"         
#>  [41] "as"             "apparent"       "in"             "sections"      
#>  [45] "3.2"            "and"            "3.3"            "the"           
#>  [49] "error"          "and"            "reporting"      "models"        
#>  [53] "can"            "take"           "on"             "many"          
#>  [57] "specifications" "without"        "linked"         "data"          
#>  [61] "analysts"       "cannot"         "use"            "exploratory"   
#>  [65] "data"           "analysis"       "to"             "inform"        
#>  [69] "the"            "model"          "choice"         "instead"       
#>  [73] "we"             "recommend"      "that"           "analysts"      
#>  [77] "posit"          "scientifically" "defensible"     "measurement"   
#>  [81] "error"          "models"         "and"            "make"          
#>  [85] "post"           "hoc"            "checks"         "of"            
#>  [89] "the"            "sensibility"    "of"             "analyses"      
#>  [93] "from"           "those"          "models"         "we"            
#>  [97] "demonstrate"    "this"           "approach"       "in"            
#> [101] "section"        "4"              "for"            "example"       
#> [105] "analysts"       "can"            "check"          "whether"       
#> [109] "or"             "not"            "the"            "predicted"     
#> [113] "probabilities"  "of"             "errors"         "implied"       
#> [117] "by"             "the"            "model"          "seem"          
#> [121] "plausible"      "as"             "another"        "diagnostic"    
#> [125] "analysts"       "can"            "compare"        "the"           
#> [129] "distribution"   "of"             "the"            "imputed"       
#> [133] "values"         "of"             "y"              "x"             
#> [137] "in"             "de"             "to"             "the"           
#> [141] "empirical"      "distribution"   "of"             "y"             
#> [145] "x"              "in"             "dg"             "this"          
#> [149] "is"             "akin"           "to"             "diagnostics"   
#> [153] "in"             "multiple"       "imputation"     "for"           
#> [157] "missing"        "data"           "that"           "compare"       
#> [161] "imputed"        "and"            "observed"       "values"        
#> [165] "abayomi"        "et"             "al"             "2008"          
#> [169] "when"           "these"          "distributions"  "differ"        
#> [173] "substantially"  "it"             "suggests"       "the"           
#> [177] "measurement"    "error"          "model"          "specification" 
#> [181] "or"             "possibly"       "the"            "true"          
#> [185] "data"           "model"          "is"             "inadequate"    
#> [189] "such"           "diagnostic"     "checks"         "only"          
#> [193] "can"            "reveal"         "problems"       "with"          
#> [197] "the"            "model"          "specification"  "they"          
#> [201] "do"             "not"            "indicate"       "that"          
#> [205] "a"              "particular"     "specification"  "is"            
#> [209] "correct"        "more"           "generally"      "it"            
#> [213] "is"             "prudent"        "to"             "keep"          
#> [217] "the"            "restrictions"   "on"             "the"           
#> [221] "number"         "of"             "identifiable"   "parameters"    
#> [225] "in"             "mind"           "when"           "specifying"    
#> [229] "the"            "models"         "at"             "most"          
#> [233] "one"            "can"            "identify"       "the"           
#> [237] "equiv"          "alent"          "of"             "dz"            
#> [241] "1"              "dx"             "parameters"     "in"            
#> [245] "the"            "combined"       "model"          "for"           
#> [249] "ei"             "zi"             "xi"             "generally"     
#> [253] "for"            "ease"           "of"             "specification" 
#> [257] "and"            "interpretation" "we"             "favor"         
#> [261] "rich"           "error"          "models"         "e"             
#> [265] "g"              "with"           "mi"             "including"     
#> [269] "variables"      "in"             "xi"             "and"           
#> [273] "yi"             "coupled"        "with"           "simple"        
#> [277] "reporting"      "models"         "like"           "those"         
#> [281] "in"             "section"        "3.3"            "the"           
#> [285] "exact"          "strategy"       "for"            "estimating"    
#> [289] "the"            "model"          "depends"        "on"            
#> [293] "the"            "features"       "of"             "dg"            
#> [297] "and"            "de"             "12"            
#> 
#> 
#> [[13]]
#> [[13]][[1]]
#>   [1] "when"          "both"          "datasets"      "can"          
#>   [5] "be"            "treated"       "as"            "simple"       
#>   [9] "random"        "samples"       "we"            "suggest"      
#>  [13] "using"         "a"             "fully"         "bayesian"     
#>  [17] "approach"      "after"         "concatenating" "dg"           
#>  [21] "and"           "de"            "here"          "one"          
#>  [25] "can"           "use"           "typical"       "prior"        
#>  [29] "distributions" "for"           "the"           "true"         
#>  [33] "data"          "and"           "error"         "models"       
#>  [37] "for"           "reporting"     "models"        "like"         
#>  [41] "those"         "in"            "4"             "and"          
#>  [45] "5"             "it"            "is"            "convenient"   
#>  [49] "to"            "use"           "independent"   "dirichlet"    
#>  [53] "priors"        "for"           "each"          "pk"           
#>  [57] "1"             "pk"            "k"             "1"            
#>  [61] "pk"            "k"             "1"             "pk"           
#>  [65] "dz"            "in"            "the"           "nscg"         
#>  [69] "acs"           "application"   "we"            "create"       
#>  [73] "prior"         "distributions" "for"           "the"          
#>  [77] "reporting"     "models"        "using"         "the"          
#>  [81] "information"   "from"          "table"         "1"            
#>  [85] "absent"        "such"          "information"   "analysts"     
#>  [89] "can"           "use"           "uniform"       "prior"        
#>  [93] "distributions" "when"          "it"            "does"         
#>  [97] "not"           "make"          "sense"         "to"           
#> [101] "concatenate"   "dg"            "and"           "de"           
#> [105] "it"            "can"           "be"            "convenient"   
#> [109] "to"            "use"           "a"             "multi"        
#> [113] "stage"         "estimation"    "strategy"      "when"         
#> [117] "imputing"      "missing"       "y"             "in"           
#> [121] "de"            "all"           "of"            "the"          
#> [125] "information"   "needed"        "from"          "dg"           
#> [129] "is"            "represented"   "by"            "the"          
#> [133] "parameters"    "of"            "the"           "true"         
#> [137] "data"          "model"         "θ"             "hence"        
#> [141] "we"            "first"         "can"           "construct"    
#> [145] "a"             "possibly"      "approximate"   "posterior"    
#> [149] "distribution"  "of"            "θ"             "using"        
#> [153] "only"          "dg"            "we"            "then"         
#> [157] "sample"        "many"          "draws"         "from"         
#> [161] "this"          "distribution"  "we"            "plug"         
#> [165] "these"         "draws"         "in"            "the"          
#> [169] "gibbs"         "sampling"      "steps"         "for"          
#> [173] "a"             "bayesian"      "predictive"    "distribution" 
#> [177] "for"           "yi"            "zi"            "xi"           
#> [181] "θ"             "for"           "the"           "cases"        
#> [185] "in"            "de"            "thereby"       "generating"   
#> [189] "the"           "multiple"      "imputations"   "we"           
#> [193] "describe"      "the"           "gibbs"         "sampler"      
#> [197] "for"           "this"          "step"          "for"          
#> [201] "the"           "nscg"          "acs"           "application"  
#> [205] "in"            "the"           "supplementary" "material"     
#> [209] "4"             "adjusting"     "for"           "reporting"    
#> [213] "errors"        "in"            "education"     "in"           
#> [217] "the"           "2010"          "acs"           "we"           
#> [221] "now"           "use"           "the"           "framework"    
#> [225] "to"            "adjust"        "inferences"    "for"          
#> [229] "potential"     "reporting"     "error"         "in"           
#> [233] "educa"         "tional"        "attainment"    "in"           
#> [237] "the"           "2010"          "acs"           "using"        
#> [241] "the"           "public"        "use"           "microdata"    
#> [245] "for"           "the"           "2010"          "nscg"         
#> [249] "as"            "the"           "gold"          "standard"     
#> [253] "file"          "dg"            "we"            "consider"     
#> [257] "two"           "main"          "analyses"      "that"         
#> [261] "could"         "be"            "affected"      "by"           
#> [265] "reporting"     "error"         "in"            "education"    
#> [269] "first"         "we"            "estimate"      "from"         
#> [273] "the"           "acs"           "the"           "number"       
#> [277] "of"            "science"       "and"           "engineering"  
#> [281] "degrees"       "awarded"       "to"            "women"        
#> [285] "we"            "base"          "the"           "estimate"     
#> [289] "on"            "an"            "indicator"     "in"           
#> [293] "the"           "acs"           "for"           "whether"      
#> [297] "or"            "not"           "each"          "individual"   
#> [301] "has"           "such"          "a"             "degree"       
#> [305] "second"        "we"            "examine"       "13"           
#> 
#> 
#> [[14]]
#> [[14]][[1]]
#>   [1] "average"        "incomes"        "across"         "degrees"       
#>   [5] "this"           "focus"          "is"             "motivated"     
#>   [9] "in"             "part"           "by"             "the"           
#>  [13] "findings"       "of"             "black"          "et"            
#>  [17] "al"             "2006"           "2008"           "who"           
#>  [21] "found"          "that"           "apparent"       "wage"          
#>  [25] "gaps"           "in"             "the"            "1990"          
#>  [29] "census"         "long"           "form"           "data"          
#>  [33] "could"          "be"             "explained"      "by"            
#>  [37] "reporting"      "errors"         "in"             "education"     
#>  [41] "as"             "de"             "we"             "use"           
#>  [45] "the"            "subset"         "of"             "acs"           
#>  [49] "microdata"      "that"           "includes"       "only"          
#>  [53] "individuals"    "who"            "reported"       "a"             
#>  [57] "bachelor’s"     "degree"         "or"             "higher"        
#>  [61] "and"            "are"            "under"          "age"           
#>  [65] "76"             "the"            "resulting"      "sample"        
#>  [69] "size"           "is"             "ne"             "600"           
#>  [73] "150"            "in"             "x"              "we"            
#>  [77] "include"        "gender"         "age"            "group"         
#>  [81] "24"             "and"            "younger"        "25"            
#>  [85] "39"             "40"             "54"             "and"           
#>  [89] "55"             "and"            "older"          "and"           
#>  [93] "an"             "indicator"      "for"            "whether"       
#>  [97] "the"            "individual’s"   "race"           "is"            
#> [101] "black"          "or"             "something"      "else"          
#> [105] "in"             "the"            "nscg"           "we"            
#> [109] "discarded"      "38"             "records"        "with"          
#> [113] "race"           "suppressed"     "leaving"        "a"             
#> [117] "sample"         "size"           "of"             "ng"            
#> [121] "77"             "150"            "we"             "consider"      
#> [125] "two"            "sets"           "of"             "measurement"   
#> [129] "error"          "model"          "specifications" "the"           
#> [133] "first"          "set"            "uses"           "specifications"
#> [137] "like"           "those"          "in"             "section"       
#> [141] "3"              "with"           "flat"           "prior"         
#> [145] "distributions"  "for"            "all"            "parameters"    
#> [149] "we"             "use"            "this"           "set"           
#> [153] "to"             "illustrate"     "model"          "diagnostics"   
#> [157] "and"            "sensitivity"    "analysis"       "absent"        
#> [161] "prior"          "information"    "about"          "the"           
#> [165] "measurement"    "error"          "process"        "the"           
#> [169] "second"         "set"            "uses"           "a"             
#> [173] "common"         "error"          "and"            "reporting"     
#> [177] "model"          "with"           "different"      "informative"   
#> [181] "prior"          "distributions"  "on"             "its"           
#> [185] "parameters"     "we"             "construct"      "these"         
#> [189] "informative"    "prior"          "distributions"  "based"         
#> [193] "on"             "the"            "analysis"       "of"            
#> [197] "the"            "1993"           "linked"         "file"          
#> [201] "for"            "all"            "specifications" "considered"    
#> [205] "we"             "create"         "m"              "50"            
#> [209] "multiple"       "imputations"    "of"             "the"           
#> [213] "plausible"      "true"           "education"      "values"        
#> [217] "in"             "the"            "2010"           "acs"           
#> [221] "which"          "we"             "then"           "analyze"       
#> [225] "using"          "the"            "methods"        "of"            
#> [229] "rubin"          "1987"           "for"            "all"           
#> [233] "specifications" "the"            "true"           "data"          
#> [237] "model"          "is"             "a"              "saturated"     
#> [241] "multinomial"    "distribution"   "for"            "the"           
#> [245] "five"           "values"         "of"             "y"             
#> [249] "for"            "each"           "combination"    "of"            
#> [253] "x"              "we"             "begin"          "by"            
#> [257] "describing"     "how"            "we"             "estimate"      
#> [261] "the"            "parameters"     "of"             "the"           
#> [265] "true"           "data"           "distribution"   "accounting"    
#> [269] "for"            "the"            "informative"    "sampling"      
#> [273] "design"         "of"             "the"            "nscg"          
#> [277] "4.1"            "accounting"     "for"            "informative"   
#> [281] "sampling"       "design"         "of"             "nscg"          
#> [285] "the"            "2010"           "nscg"           "uses"          
#> [289] "reported"       "education"      "in"             "the"           
#> [293] "2010"           "acs"            "as"             "a"             
#> [297] "stratification" "variable"       "fesco"          "et"            
#> [301] "al"             "2012"           "finamore"       "2013"          
#> [305] "its"            "unweighted"     "percentages"    "can"           
#> [309] "over"           "represent"      "14"            
#> 
#> 
#> [[15]]
#> [[15]][[1]]
#>   [1] "or"           "under"        "represent"    "degree"       "types"       
#>   [6] "in"           "the"          "population"   "this"         "is"          
#>  [11] "most"         "obviously"    "the"          "case"         "for"         
#>  [16] "individuals"  "without"      "a"            "college"      "degree"      
#>  [21] "yi"           "5"            "we"           "need"         "to"          
#>  [26] "account"      "for"          "this"         "informative"  "sampling"    
#>  [31] "when"         "estimating"   "parameters"   "of"           "the"         
#>  [36] "true"         "data"         "model"        "we"           "do"          
#>  [41] "so"           "with"         "a"            "two"          "stage"       
#>  [46] "approach"     "first"        "we"           "use"          "survey"      
#>  [51] "weighted"     "inferences"   "to"           "estimate"     "population"  
#>  [56] "totals"       "of"           "y"            "x"            "from"        
#>  [61] "the"          "2010"         "nscg"         "second"       "we"          
#>  [66] "turn"         "these"        "estimates"    "into"         "an"          
#>  [71] "approximate"  "bayesian"     "posterior"    "distribution" "for"         
#>  [76] "input"        "to"           "fitting"      "the"          "measurement" 
#>  [81] "error"        "models"       "used"         "to"           "impute"      
#>  [86] "plausible"    "values"       "of"           "yi"           "for"         
#>  [91] "individuals"  "in"           "the"          "acs"          "we"          
#>  [96] "now"          "describe"     "this"         "process"      "which"       
#> [101] "can"          "be"           "used"         "generally"    "when"        
#> [106] "dg"           "is"           "collected"    "via"          "a"           
#> [111] "complex"      "survey"       "design"       "suppose"      "for"         
#> [116] "the"          "moment"       "that"         "dy"           "dz"          
#> [121] "this"         "is"           "not"          "the"          "case"        
#> [126] "when"         "de"           "is"           "the"          "acs"         
#> [131] "where"        "dz"           "4"            "and"          "dg"          
#> [136] "is"           "the"          "nscg"         "where"        "dy"          
#> [141] "5"            "however"      "we"           "start"        "here"        
#> [146] "to"           "fix"          "ideas"        "for"          "all"         
#> [151] "possible"     "combinations" "x"            "let"          "θxk"         
#> [156] "pr"           "y"            "k"            "x"            "x"           
#> [161] "and"          "let"          "θx"           "θx1"          "θxdy"        
#> [166] "we"           "seek"         "to"           "use"          "dg"          
#> [171] "to"           "specify"      "f"            "θ"            "x"           
#> [176] "y"            "to"           "do"           "so"           "we"          
#> [181] "first"        "parameterize" "θxk"          "txk"          "dj"          
#> [186] "1"            "py"           "txj"          "where"        "txk"         
#> [191] "is"           "the"          "population"   "count"        "of"          
#> [196] "individuals"  "with"         "xi"           "x"            "yi"          
#> [201] "k"            "we"           "estimate"     "tx"           "tx1"         
#> [206] "txdy"         "and"          "the"          "associated"   "covariance"  
#> [211] "matrix"       "of"           "the"          "estimator"    "using"       
#> [216] "standard"     "survey"       "weighted"     "estimation"   "let"         
#> [221] "wi"           "be"           "the"          "sample"       "weight"      
#> [226] "for"          "all"          "i"            "dg"           "we"          
#> [231] "compute"      "the"          "estimated"    "total"        "and"         
#> [236] "associated"   "variance"     "for"          "each"         "x"           
#> [241] "and"          "k"            "as"           "ng"           "x"           
#> [246] "t̂xk"          "wi"           "i"            "xi"           "x"           
#> [251] "yi"           "k"            "6"            "i"            "1"           
#> [256] "ng"           "2"            "d"            "t̂xk"          "ng"          
#> [261] "t̂xk"          "x"            "var"          "wi"           "i"           
#> [266] "xi"           "x"            "yi"           "k"            "7"           
#> [271] "ng"           "1"            "i"            "1"            "ng"          
#> [276] "15"          
#> 
#> 
#> [[16]]
#> [[16]][[1]]
#>   [1] "for"          "each"         "k"            "and"          "l"           
#>   [6] "with"         "l"            "6"            "k"            "we"          
#>  [11] "also"         "compute"      "the"          "estimated"    "covariance"  
#>  [16] "ng"           "ng"           "x"            "t̂xk"          "cov"         
#>  [21] "t̂xk"          "t̂xl"          "d"            "wi"           "i"           
#>  [26] "xi"           "x"            "yi"           "k"            "ng"          
#>  [31] "1"            "i"            "1"            "ng"           "t̂xl"         
#>  [36] "wi"           "i"            "xi"           "x"            "yi"          
#>  [41] "l"            "8"            "ng"           "the"          "variance"    
#>  [46] "and"          "covariance"   "estimators"   "are"          "the"         
#>  [51] "design"       "based"        "estimators"   "for"          "probability" 
#>  [56] "proportional" "to"           "size"         "sampling"     "with"        
#>  [61] "replacement"  "as"           "is"           "typical"      "of"          
#>  [66] "multi"        "stage"        "complex"      "surveys"      "lohr"        
#>  [71] "2010"         "switching"    "now"          "to"           "a"           
#>  [76] "bayesian"     "modeling"     "perspective"  "we"           "assume"      
#>  [81] "that"         "tx"           "log"          "normal"       "µx"          
#>  [86] "τx"           "so"           "as"           "to"           "ensure"      
#>  [91] "a"            "distribution" "with"         "positive"     "values"      
#>  [96] "for"          "all"          "true"         "totals"       "we"          
#> [101] "select"       "µx"           "τx"           "so"           "that"        
#> [106] "each"         "e"            "txk"          "t̂xk"          "and"         
#> [111] "var"          "tx"           "σ̂"            "t̂x"           "the"         
#> [116] "estimated"    "covariance"   "matrix"       "with"         "elements"    
#> [121] "defined"      "by"           "7"            "and"          "8"           
#> [126] "these"        "are"          "derived"      "from"         "moment"      
#> [131] "matching"     "tarmast"      "2001"         "we"           "have"        
#> [136] "µxj"          "log"          "t̂xj"          "τx"           "j"           
#> [141] "j"            "2"            "9"            "2"            "τx"          
#> [146] "j"            "j"            "log"          "1"            "σ̂x"          
#> [151] "j"            "j"            "t̂xj"          "10"           "τx"          
#> [156] "j"            "i"            "log"          "1"            "σ̂x"          
#> [161] "j"            "i"            "t̂xj"          "t̂xi"          "11"          
#> [166] "where"        "the"          "notation"     "j"            "i"           
#> [171] "denotes"      "an"           "element"      "in"           "row"         
#> [176] "j"            "and"          "column"       "i"            "of"          
#> [181] "the"          "matrix"       "we"           "draw"         "tx"          
#> [186] "from"         "this"         "log"          "normal"       "distribution"
#> [191] "and"          "transform"    "to"           "draws"        "θx"          
#> [196] "since"        "the"          "2010"         "nscg"         "does"        
#> [201] "not"          "include"      "individuals"  "who"          "claim"       
#> [206] "in"           "the"          "acs"          "to"           "have"        
#> [211] "less"         "than"         "a"            "bachelor’s"   "degree"      
#> [216] "we"           "cannot"       "use"          "dg"           "directly"    
#> [221] "to"           "estimate"     "tx5"          "instead"      "we"          
#> [226] "estimate"     "tx"           "tx1"          "tx2"          "tx3"         
#> [231] "tx4"          "tx5"          "using"        "the"          "acs"         
#> [236] "data"         "and"          "estimate"     "tx1"          "tx2"         
#> [241] "tx3"          "tx4"          "from"         "the"          "nscg"        
#> [246] "using"        "the"          "method"       "described"    "previously"  
#> [251] "this"         "leads"        "to"           "an"           "estimate"    
#> [256] "for"          "tx5"          "more"         "precisely"    "let"         
#> [261] "the"          "acs"          "design"       "based"        "estimator"   
#> [266] "for"          "tx"           "16"          
#> 
#> 
#> [[17]]
#> [[17]][[1]]
#>   [1] "table"          "2"              "summary"        "of"            
#>   [5] "the"            "first"          "four"           "measurement"   
#>   [9] "error"          "model"          "specifications" "for"           
#>  [13] "2010"           "nscg"           "acs"            "analysis"      
#>  [17] "these"          "models"         "use"            "flat"          
#>  [21] "prior"          "distributions"  "on"             "all"           
#>  [25] "parameters"     "error"          "model"          "reporting"     
#>  [29] "model"          "expression"     "for"            "mit"           
#>  [33] "β"              "p"              "r"              "zi"            
#>  [37] "yi"             "k"              "ei"             "1"             
#>  [41] "p4"             "model"          "1"              "β1"            
#>  [45] "k"              "2"              "βk"             "i"             
#>  [49] "yi"             "k"              "categorical"    "pk"            
#>  [53] "1"              "pk"             "4"              "p4"            
#>  [57] "m"              "model"          "2"              "β1"            
#>  [61] "k"              "2"              "βk"             "i"             
#>  [65] "yi"             "k"              "xi"             "sex"           
#>  [69] "m"              "categorical"    "pk"             "1"             
#>  [73] "pk"             "4"              "p4"             "no"            
#>  [77] "model"          "3"              "β1"             "k"             
#>  [81] "2"              "βk"             "i"              "yi"            
#>  [85] "k"              "xi"             "black"          "no"            
#>  [89] "categorical"    "pk"             "1"              "pk"            
#>  [93] "4"              "p4"             "yes"            "k"             
#>  [97] "1"              "βk"             "i"              "yi"            
#> [101] "k"              "xi"             "black"          "yes"           
#> [105] "p4"             "m"              "model"          "4"             
#> [109] "β1"             "k"              "2"              "βk"            
#> [113] "i"              "yi"             "k"              "xi"            
#> [117] "sex"            "m"              "categorical"    "pm"            
#> [121] "k"              "1"              "pm"             "k"             
#> [125] "4"              "if"             "xi"             "sex"           
#> [129] "m"              "p4"             "f"              "k"             
#> [133] "1"              "βk"             "i"              "yi"            
#> [137] "k"              "xi"             "sex"            "f"             
#> [141] "categorical"    "pf"             "k"              "1"             
#> [145] "pf"             "k"              "4"              "if"            
#> [149] "xi"             "sex"            "f"              "be"            
#> [153] "t̂x"             "with"           "design"         "based"         
#> [157] "variance"       "estimate"       "σ̂"              "2"             
#> [161] "t̂x"             "we"             "sample"         "a"             
#> [165] "value"          "tx"             "normal"         "t̂x"            
#> [169] "σ̂"              "2"              "t̂x"             "using"         
#> [173] "an"             "independent"    "sample"         "of"            
#> [177] "values"         "of"             "tx1"            "tx4"           
#> [181] "from"           "4j"             "1"              "txj"           
#> [185] "and"            "set"            "tx"             "tx1"           
#> [189] "p"              "the"            "nscg"           "we"            
#> [193] "compute"        "tx5"            "tx"             "tx5"           
#> [197] "we"             "repeat"         "these"          "steps"         
#> [201] "10,000"         "times"          "we"             "then"          
#> [205] "compute"        "the"            "mean"           "and"           
#> [209] "covariance"     "matrix"         "of"             "the"           
#> [213] "10,000"         "draws"          "which"          "we"            
#> [217] "again"          "plug"           "into"           "9"             
#> [221] "11"             "the"            "resulting"      "log"           
#> [225] "normal"         "distri"         "bution"         "is"            
#> [229] "the"            "approximate"    "posterior"      "distribution"  
#> [233] "of"             "θx"             "we"             "include"       
#> [237] "an"             "example"        "of"             "this"          
#> [241] "entire"         "procedure"      "in"             "the"           
#> [245] "supplementary"  "material"       "4.2"            "measurement"   
#> [249] "error"          "models"         "the"            "two"           
#> [253] "sets"           "of"             "measurement"    "error"         
#> [257] "models"         "include"        "four"           "that"          
#> [261] "use"            "flat"           "prior"          "distributions" 
#> [265] "and"            "three"          "that"           "use"           
#> [269] "informative"    "prior"          "distributions"  "based"         
#> [273] "on"             "the"            "1993"           "linked"        
#> [277] "data"           "for"            "all"            "error"         
#> [281] "models"         "we"             "use"            "a"             
#> [285] "logistic"       "regression"     "of"             "ei"            
#> [289] "on"             "various"        "main"           "effects"       
#> [293] "and"            "interactions"   "of"             "yi"            
#> [297] "and"            "xi"             "for"            "all"           
#> [301] "reporting"      "models"         "we"             "use"           
#> [305] "categorical"    "distributions"  "with"           "probabilities" 
#> [309] "that"           "depend"         "on"             "yi"            
#> [313] "and"            "possibly"       "xi"             "the"           
#> [317] "four"           "models"         "with"           "flat"          
#> [321] "prior"          "distributions"  "are"            "summarized"    
#> [325] "in"             "table"          "2"              "in"            
#> [329] "model"          "1"              "the"            "error"         
#> [333] "and"            "reporting"      "models"         "depend"        
#> [337] "only"           "on"             "17"            
#> 
#> 
#> [[18]]
#> [[18]][[1]]
#>   [1] "table"          "3"              "summary"        "of"            
#>   [5] "informative"    "prior"          "specifications" "for"           
#>   [9] "2010"           "nscg"           "acs"            "analysis"      
#>  [13] "for"            "males"          "with"           "bachelor’s"    
#>  [17] "degrees"        "error"          "rate"           "reporting"     
#>  [21] "probabilities"  "pm"             "1"              "2"             
#>  [25] "pm"             "1"              "3"              "pm"            
#>  [29] "1"              "4"              "model"          "4"             
#>  [33] "beta"           "1"              "1"              "dirichlet"     
#>  [37] "1"              "1"              "1"              "model"         
#>  [41] "5"              "beta"           "76"             "14.24"         
#>  [45] "dirichlet"      "3.54"           "1.27"           "0.19"          
#>  [49] "model"          "6"              "beta"           "2724.2"        
#>  [53] "50862"          "dirichlet"      "2235.3"         "799.7"         
#>  [57] "123.1"          "model"          "7"              "beta"          
#>  [61] "500"            "99500"          "dirichlet"      "1"             
#>  [65] "1"              "1"              "yi"             "model"         
#>  [69] "2"              "and"            "3"              "keep"          
#>  [73] "the"            "reporting"      "model"          "as"            
#>  [77] "in"             "4"              "but"            "expand"        
#>  [81] "the"            "error"          "model"          "in"            
#>  [85] "model"          "2"              "the"            "probability"   
#>  [89] "of"             "a"              "reporting"      "error"         
#>  [93] "can"            "vary"           "with"           "yi"            
#>  [97] "and"            "sex"            "xi"             "sex"           
#> [101] "in"             "model"          "3"              "error"         
#> [105] "probabilities"  "can"            "vary"           "with"          
#> [109] "yi"             "and"            "the"            "indicator"     
#> [113] "for"            "black"          "race"           "xi"            
#> [117] "black"          "in"             "model"          "4"             
#> [121] "the"            "error"          "and"            "reporting"     
#> [125] "models"         "both"           "depend"         "on"            
#> [129] "y"              "and"            "sex"            "for"           
#> [133] "models"         "5"              "7"              "we"            
#> [137] "use"            "the"            "specification"  "in"            
#> [141] "model"          "4"              "and"            "incorporate"   
#> [145] "prior"          "in"             "formation"      "about"         
#> [149] "the"            "measurement"    "errors"         "from"          
#> [153] "the"            "1993"           "linked"         "data"          
#> [157] "in"             "constructing"   "the"            "priors"        
#> [161] "we"             "first"          "remove"         "records"       
#> [165] "that"           "have"           "been"           "flagged"       
#> [169] "as"             "having"         "missing"        "education"     
#> [173] "that"           "has"            "been"           "imputed"       
#> [177] "because"        "these"          "imputations"    "might"         
#> [181] "not"            "closely"        "reflect"        "the"           
#> [185] "actual"         "education"      "values"         "black"         
#> [189] "et"             "al"             "2003"           "table"         
#> [193] "3"              "displays"       "the"            "prior"         
#> [197] "distributions"  "for"            "males"          "with"          
#> [201] "bachelor’s"     "degrees"        "details"        "on"            
#> [205] "how"            "we"             "arrive"         "at"            
#> [209] "these"          "and"            "other"          "groups"        
#> [213] "prior"          "specifications" "are"            "in"            
#> [217] "the"            "supplementary"  "material"       "here"          
#> [221] "we"             "summarize"      "briefly"        "x"             
#> [225] "for"            "model"          "5"              "we"            
#> [229] "set"            "the"            "prior"          "distributions" 
#> [233] "for"            "each"           "βk"             "so"            
#> [237] "that"           "the"            "error"          "rates"         
#> [241] "are"            "centered"       "at"             "the"           
#> [245] "estimate"       "from"           "the"            "1993"          
#> [249] "linked"         "data"           "we"             "also"          
#> [253] "require"        "the"            "central"        "95"            
#> [257] "probability"    "interval"       "of"             "the"           
#> [261] "prior"          "distribution"   "on"             "each"          
#> [265] "error"          "rate"           "to"             "be"            
#> [269] "close"          "to"             "005"            "20"            
#> [273] "allowing"       "for"            "a"              "wide"          
#> [277] "but"            "not"            "unrealistic"    "range"         
#> [281] "of"             "possible"       "error"          "rates"         
#> [285] "for"            "the"            "reporting"      "probabilities" 
#> [289] "pm"             "k"              "z"              "and"           
#> [293] "pf"             "k"              "z"              "we"            
#> [297] "center"         "most"           "of"             "the"           
#> [301] "prior"          "distributions"  "at"             "the"           
#> [305] "corresponding"  "estimates"      "from"           "the"           
#> [309] "1993"           "linked"         "data"           "we"            
#> [313] "require"        "the"            "central"        "95"            
#> [317] "probability"    "interval"       "of"             "each"          
#> [321] "prior"          "distribution"   "to"             "have"          
#> [325] "support"        "on"             "values"         "of"            
#> [329] "p"              "k"              "z"              "within"        
#> [333] "10"             "of"             "the"            "1993"          
#> [337] "point"          "estimate"       "truncating"     "at"            
#> [341] "zero"           "or"             "one"            "as"            
#> [345] "needed"         "one"            "exception"      "is"            
#> [349] "18"            
#> 
#> 
#> [[19]]
#> [[19]][[1]]
#>   [1] "the"           "reporting"     "probabilities" "for"          
#>   [5] "those"         "with"          "no"            "college"      
#>   [9] "degree"        "who"           "report"        "professional" 
#>  [13] "degree"        "which"         "we"            "center"       
#>  [17] "at"            "half"          "the"           "1993"         
#>  [21] "estimate"      "the"           "census"        "bureau"       
#>  [25] "has"           "improved"      "the"           "clarity"      
#>  [29] "of"            "the"           "definition"    "of"           
#>  [33] "professional"  "in"            "the"           "20"           
#>  [37] "years"         "since"         "the"           "1990"         
#>  [41] "long"          "form"          "as"            "discussed"    
#>  [45] "in"            "the"           "prior"         "specification"
#>  [49] "section"       "of"            "the"           "supplementary"
#>  [53] "material"      "for"           "model"         "6"            
#>  [57] "we"            "use"           "the"           "same"         
#>  [61] "prior"         "means"         "as"            "in"           
#>  [65] "model"         "5"             "for"           "both"         
#>  [69] "error"         "and"           "re"            "porting"      
#>  [73] "models"        "however"       "we"            "substantially"
#>  [77] "tighten"       "the"           "prior"         "distributions"
#>  [81] "to"            "make"          "the"           "prior"        
#>  [85] "variance"      "accord"        "with"          "the"          
#>  [89] "uncertainty"   "in"            "the"           "point"        
#>  [93] "estimates"     "from"          "the"           "1993"         
#>  [97] "linked"        "data"          "we"            "do"           
#> [101] "so"            "by"            "using"         "prior"        
#> [105] "sample"        "sizes"         "that"          "match"        
#> [109] "those"         "from"          "the"           "1993"         
#> [113] "nscg"          "for"           "example"       "the"          
#> [117] "1993"          "nscg"          "included"      "53,586"       
#> [121] "males"         "with"          "bachelor’s"    "degrees"      
#> [125] "excluding"     "those"         "records"       "who"          
#> [129] "had"           "their"         "census"        "education"    
#> [133] "imputed"       "we"            "therefore"     "use"          
#> [137] "beta"          "2724.2"        "50862"         "as"           
#> [141] "the"           "prior"         "distribution"  "for"          
#> [145] "the"           "error"         "rate"          "for"          
#> [149] "this"          "x"             "we"            "similarly"    
#> [153] "increase"      "the"           "prior"         "sample"       
#> [157] "sizes"         "for"           "the"           "reporting"    
#> [161] "probabilities" "to"            "match"         "the"          
#> [165] "1993"          "nscg"          "sample"        "sizes"        
#> [169] "model"         "7"             "departs"       "from"         
#> [173] "the"           "1993"          "linked"        "data"         
#> [177] "estimates"     "and"           "encodes"       "a"            
#> [181] "strong"        "prior"         "belief"        "that"         
#> [185] "almost"        "no"            "one"           "misreports"   
#> [189] "their"         "education"     "except"        "for"          
#> [193] "haphazard"     "mistakes"      "here"          "we"           
#> [197] "set"           "the"           "prior"         "mean"         
#> [201] "for"           "the"           "probability"   "of"           
#> [205] "misreporting"  "education"     "to"            "005"          
#> [209] "for"           "all"           "demographic"   "groups"       
#> [213] "we"            "use"           "a"             "prior"        
#> [217] "sample"        "size"          "of"            "100,000"      
#> [221] "making"        "the"           "prior"         "distribution" 
#> [225] "concentrate"   "strongly"      "around"        "005"          
#> [229] "for"           "the"           "reporting"     "probabilities"
#> [233] "we"            "use"           "a"             "non"          
#> [237] "informative"   "prior"         "distribution"  "for"          
#> [241] "convenience"   "since"         "the"           "estimates"    
#> [245] "of"            "the"           "reporting"     "probabilities"
#> [249] "are"           "strongly"      "influenced"    "by"           
#> [253] "the"           "concentrated"  "prior"         "distributions"
#> [257] "on"            "the"           "error"         "rates"        
#> [261] "finally"       "for"           "comparison"    "purposes"     
#> [265] "we"            "also"          "fit"           "the"          
#> [269] "model"         "based"         "on"            "a"            
#> [273] "conditional"   "independence"  "assumption"    "cia"          
#> [277] "to"            "impute"        "yi"            "for"          
#> [281] "individuals"   "in"            "the"           "acs"          
#> [285] "under"         "the"           "cia"           "we"           
#> [289] "sample"        "θ"             "and"           "then"         
#> [293] "impute"        "y"             "θ"             "x"            
#> [297] "from"          "the"           "true"          "data"         
#> [301] "model"         "here"          "we"            "do"           
#> [305] "not"           "use"           "the"           "reported"     
#> [309] "value"         "of"            "zi"            "in"           
#> [313] "the"           "imputations"   "19"           
#> 
#> 
#> [[20]]
#> [[20]][[1]]
#>   [1] "4.3"           "empirical"     "results"       "we"           
#>   [5] "first"         "examine"       "what"          "each"         
#>   [9] "model"         "suggests"      "about"         "the"          
#>  [13] "extent"        "and"           "nature"        "of"           
#>  [17] "the"           "mea"           "surement"      "errors"       
#>  [21] "in"            "the"           "2010"          "acs"          
#>  [25] "we"            "then"          "use"           "the"          
#>  [29] "models"        "to"            "assess"        "sensitivity"  
#>  [33] "of"            "results"       "about"         "the"          
#>  [37] "substantive"   "questions"     "related"       "to"           
#>  [41] "number"        "of"            "degrees"       "and"          
#>  [45] "income"        "4.3.1"         "distributions" "of"           
#>  [49] "errors"        "in"            "reported"      "acs"          
#>  [53] "education"     "values"        "table"         "4"            
#>  [57] "displays"      "the"           "multiple"      "imputation"   
#>  [61] "point"         "estimates"     "and"           "95"           
#>  [65] "confidence"    "intervals"     "for"           "the"          
#>  [69] "proportions"   "of"            "errors"        "by"           
#>  [73] "gender"        "and"           "nscg"          "education"    
#>  [77] "obtained"      "from"          "the"           "m"            
#>  [81] "50"            "draws"         "of"            "ei"           
#>  [85] "for"           "all"           "individuals"   "in"           
#>  [89] "de"            "we"            "begin"         "by"           
#>  [93] "comparing"     "results"       "for"           "the"          
#>  [97] "set"           "of"            "models"        "with"         
#> [101] "flat"          "prior"         "distributions" "models"       
#> [105] "1"             "4"             "and"           "the"          
#> [109] "cia"           "model"         "then"          "move"         
#> [113] "to"            "the"           "set"           "of"           
#> [117] "models"        "with"          "informative"   "prior"        
#> [121] "distributions" "models"        "5"             "7"            
#> [125] "the"           "cia"           "model"         "suggests"     
#> [129] "extremely"     "large"         "error"         "percentages"  
#> [133] "especially"    "for"           "the"           "highest"      
#> [137] "education"     "levels"        "these"         "rates"        
#> [141] "seem"          "unlikely"      "to"            "be"           
#> [145] "reality"       "leading"       "us"            "to"           
#> [149] "reject"        "the"           "cia"           "model"        
#> [153] "the"           "overall"       "error"         "rates"        
#> [157] "for"           "models"        "1"             "4"            
#> [161] "are"           "similar"       "and"           "more"         
#> [165] "realistic"     "than"          "those"         "from"         
#> [169] "the"           "cia"           "model"         "the"          
#> [173] "differences"   "in"            "error"         "estimates"    
#> [177] "between"       "model"         "2"             "and"          
#> [181] "model"         "1"             "suggest"       "that"         
#> [185] "the"           "probability"   "of"            "error"        
#> [189] "depends"       "on"            "sex"           "comparing"    
#> [193] "results"       "for"           "model"         "3"            
#> [197] "and"           "model"         "1"             "however"      
#> [201] "we"            "see"           "little"        "evidence"     
#> [205] "of"            "important"     "race"          "effects"      
#> [209] "on"            "the"           "propensity"    "to"           
#> [213] "make"          "errors"        "model"         "4"            
#> [217] "generalizes"   "model"         "2"             "by"           
#> [221] "allowing"      "the"           "reporting"     "probabilities"
#> [225] "to"            "vary"          "by"            "sex"          
#> [229] "if"            "these"         "probabilities" "were"         
#> [233] "similar"       "across"        "sex"           "in"           
#> [237] "reality"       "we"            "would"         "expect"       
#> [241] "the"           "two"           "models"        "to"           
#> [245] "produce"       "similar"       "results"       "however"      
#> [249] "the"           "estimated"     "error"         "rates"        
#> [253] "are"           "fairly"        "different"     "for"          
#> [257] "example"       "the"           "estimated"     "proportion"   
#> [261] "of"            "errors"        "for"           "female"       
#> [265] "professionals" "from"          "model"         "4"            
#> [269] "is"            "about"         "double"        "that"         
#> [273] "from"          "model"         "2"             "to"           
#> [277] "determine"     "where"         "the"           "models"       
#> [281] "differ"        "most"          "we"            "examine"      
#> [285] "the"           "estimated"     "reporting"     "probabilities"
#> [289] "displayed"     "in"            "table"         "5"            
#> [293] "model"         "4"             "estimates"     "some"         
#> [297] "significant"   "differences"   "in"            "reporting"    
#> [301] "probabilities" "by"            "gender"        "for"          
#> [305] "example"       "20"           
#> 
#> 
#> [[21]]
#> [[21]][[1]]
#>   [1] "males"          "with"           "bachelor’s"     "degrees"       
#>   [5] "who"            "make"           "a"              "reporting"     
#>   [9] "error"          "are"            "estimated"      "to"            
#>  [13] "report"         "a"              "master’s"       "degree"        
#>  [17] "with"           "probability"    "96"             "whereas"       
#>  [21] "females"        "with"           "bachelor’s"     "degrees"       
#>  [25] "who"            "make"           "a"              "reporting"     
#>  [29] "error"          "are"            "estimated"      "to"            
#>  [33] "report"         "a"              "master’s"       "degree"        
#>  [37] "with"           "probability"    "67"             "and"           
#>  [41] "a"              "professional"   "degree"         "with"          
#>  [45] "probability"    "30"             "other"          "large"         
#>  [49] "differences"    "exist"          "for"            "professional"  
#>  [53] "degree"         "holders"        "females"        "with"          
#>  [57] "professional"   "degrees"        "who"            "make"          
#>  [61] "a"              "reporting"      "error"          "are"           
#>  [65] "most"           "likely"         "to"             "report"        
#>  [69] "a"              "bachelor’s"     "degree"         "whereas"       
#>  [73] "men"            "with"           "professional"   "degrees"       
#>  [77] "who"            "make"           "a"              "reporting"     
#>  [81] "error"          "are"            "most"           "likely"        
#>  [85] "to"             "report"         "a"              "master’s"      
#>  [89] "degree"         "or"             "ph"             "d"             
#>  [93] "we"             "note"           "that"           "some"          
#>  [97] "of"             "the"            "estimates"      "for"           
#> [101] "model"          "4"              "are"            "based"         
#> [105] "on"             "small"          "sample"         "sizes"         
#> [109] "which"          "explains"       "the"            "wide"          
#> [113] "standard"       "errors"         "turning"        "to"            
#> [117] "models"         "5"              "7"              "we"            
#> [121] "can"            "see"            "the"            "impact"        
#> [125] "of"             "the"            "informative"    "prior"         
#> [129] "distributions"  "by"             "comparing"      "results"       
#> [133] "in"             "table"          "4"              "under"         
#> [137] "these"          "models"         "to"             "those"         
#> [141] "for"            "model"          "4"              "moving"        
#> [145] "from"           "model"          "4"              "to"            
#> [149] "model"          "5"              "the"            "most"          
#> [153] "noticeable"     "differences"    "are"            "for"           
#> [157] "women"          "with"           "a"              "ph"            
#> [161] "d"              "and"            "men"            "with"          
#> [165] "a"              "master’s"       "degree"         "for"           
#> [169] "whom"           "model"          "5"              "suggests"      
#> [173] "lower"          "error"          "rates"          "these"         
#> [177] "groups"         "have"           "smaller"        "sample"        
#> [181] "sizes"          "so"             "that"           "the"           
#> [185] "data"           "do"             "not"            "swamp"         
#> [189] "the"            "effects"        "of"             "the"           
#> [193] "prior"          "distribution"   "when"           "making"        
#> [197] "the"            "prior"          "sample"         "sizes"         
#> [201] "very"           "large"          "as"             "in"            
#> [205] "models"         "6"              "and"            "7"             
#> [209] "the"            "information"    "in"             "the"           
#> [213] "prior"          "distribution"   "tends"          "to"            
#> [217] "overwhelm"      "the"            "information"    "in"            
#> [221] "the"            "data"           "we"             "provide"       
#> [225] "more"           "thorough"       "investigation"  "of"            
#> [229] "the"            "impact"         "of"             "the"           
#> [233] "prior"          "specifications" "in"             "the"           
#> [237] "supplementary"  "material"       "of"             "course"        
#> [241] "we"             "cannot"         "be"             "certain"       
#> [245] "which"          "model"          "most"           "closely"       
#> [249] "reflects"       "the"            "true"           "measure"       
#> [253] "ment"           "error"          "mechanism"      "the"           
#> [257] "best"           "we"             "can"            "do"            
#> [261] "is"             "perform"        "diagnostic"     "tests"         
#> [265] "to"             "see"            "which"          "models"        
#> [269] "if"             "any"            "should"         "be"            
#> [273] "discounted"     "as"             "not"            "adequately"    
#> [277] "describing"     "the"            "observed"       "data"          
#> [281] "m"              "for"            "each"           "acs"           
#> [285] "imputed"        "dataset"        "de"             "under"         
#> [289] "each"           "model"          "we"             "compute"       
#> [293] "the"            "sample"         "pro"            "m"             
#> [297] "portions"       "π̂xk"            "and"            "corresponding" 
#> [301] "multiple"       "imputation"     "95"             "confidence"    
#> [305] "intervals"      "for"            "all"            "165̇"           
#> [309] "unique"         "values"         "of"             "x"             
#> [313] "y"              "we"             "determine"      "how"           
#> [317] "many"           "of"             "the"            "80"            
#> [321] "estimated"      "population"     "percentages"    "of"            
#> [325] "y"              "x"              "computed"       "from"          
#> [329] "the"            "2010"           "nscg"           "using"         
#> [333] "the"            "estimated"      "t̂x"             "from"          
#> [337] "the"            "acs"            "to"             "back"          
#> [341] "into"           "an"             "estimate"       "of"            
#> [345] "t̂x5"            "fall"           "within"         "the"           
#> [349] "multiple"       "imputation"     "95"             "21"            
#> 
#> 
#> [[22]]
#> [[22]][[1]]
#>   [1] "confidence"     "intervals"      "models"         "that"          
#>   [5] "yield"          "low"            "rates"          "do"            
#>   [9] "not"            "describe"       "the"            "data"          
#>  [13] "accurately"     "for"            "model"          "1"             
#>  [17] "73"             "of"             "80"             "nscg"          
#>  [21] "population"     "share"          "estimates"      "are"           
#>  [25] "contained"      "in"             "the"            "acs"           
#>  [29] "multiple"       "imputation"     "intervals"      "corresponding" 
#>  [33] "counts"         "are"            "75"             "for"           
#>  [37] "model"          "2"              "71"             "for"           
#>  [41] "model"          "3"              "and"            "76"            
#>  [45] "for"            "model"          "4"              "these"         
#>  [49] "results"        "suggest"        "that"           "model"         
#>  [53] "1"              "and"            "model"          "3"             
#>  [57] "may"            "be"             "inferior"       "to"            
#>  [61] "model"          "2"              "and"            "model"         
#>  [65] "4"              "for"            "the"            "models"        
#>  [69] "with"           "informative"    "prior"          "distributions" 
#>  [73] "the"            "counts"         "are"            "74"            
#>  [77] "for"            "model"          "5"              "67"            
#>  [81] "for"            "model"          "6"              "and"           
#>  [85] "54"             "for"            "model"          "7"             
#>  [89] "although"       "the"            "prior"          "beliefs"       
#>  [93] "in"             "models"         "6"              "and"           
#>  [97] "7"              "seem"           "plausible"      "at"            
#> [101] "first"          "glance"         "the"            "diagnostic"    
#> [105] "suggests"       "that"           "they"           "do"            
#> [109] "not"            "describe"       "the"            "2010"          
#> [113] "data"           "distributions"  "as"             "well"          
#> [117] "as"             "models"         "4"              "and"           
#> [121] "5"              "considering"    "the"            "results"       
#> [125] "as"             "well"           "as"             "the"           
#> [129] "diagnostic"     "check"          "if"             "we"            
#> [133] "had"            "to"             "choose"         "one"           
#> [137] "model"          "we"             "would"          "select"        
#> [141] "model"          "5"              "it"             "seems"         
#> [145] "plausible"      "that"           "the"            "probability"   
#> [149] "of"             "misreporting"   "education"      "as"            
#> [153] "well"           "as"             "the"            "reported"      
#> [157] "value"          "itself"         "when"           "errors"        
#> [161] "are"            "made"           "depend"         "on"            
#> [165] "both"           "sex"            "and"            "true"          
#> [169] "education"      "level"          "additionally"   "the"           
#> [173] "prior"          "distribution"   "from"           "the"           
#> [177] "1993"           "linked"         "data"           "pulls"         
#> [181] "estimates"      "in"             "groups"         "with"          
#> [185] "little"         "sample"         "size"           "to"            
#> [189] "measurement"    "error"          "distributions"  "that"          
#> [193] "seem"           "more"           "plausible"      "on"            
#> [197] "face"           "value"          "however"        "one"           
#> [201] "need"           "not"            "use"            "the"           
#> [205] "data"           "fusion"         "framework"      "for"           
#> [209] "measurement"    "error"          "to"             "select"        
#> [213] "a"              "single"         "model"          "rather"        
#> [217] "one"            "can"            "use"            "the"           
#> [221] "framework"      "to"             "examine"        "sensitivity"   
#> [225] "of"             "analyses"       "to"             "the"           
#> [229] "different"      "specifications" "4.3.2"          "sensitivity"   
#> [233] "analyses"       "figure"         "2"              "displays"      
#> [237] "the"            "multiply"       "imputed"        "survey"        
#> [241] "weighted"       "inferences"     "for"            "the"           
#> [245] "total"          "number"         "of"             "women"         
#> [249] "with"           "science"        "and"            "engineering"   
#> [253] "degrees"        "computing"      "using"          "the"           
#> [257] "acs"            "specific"       "indicator"      "variable"      
#> [261] "we"             "show"           "results"        "for"           
#> [265] "models"         "4"              "7"              "the"           
#> [269] "cia"            "model"          "and"            "based"         
#> [273] "on"             "the"            "acs"            "data"          
#> [277] "without"        "any"            "adjustment"     "for"           
#> [281] "misreporting"   "education"      "the"            "confidence"    
#> [285] "intervals"      "for"            "model"          "4"             
#> [289] "and"            "model"          "5"              "overlap"       
#> [293] "substantially"  "suggesting"     "not"            "much"          
#> [297] "practical"      "difference"     "in"             "choosing"      
#> [301] "among"          "these"          "models"         "however"       
#> [305] "both"           "are"            "noticeably"     "different"     
#> [309] "from"           "the"            "other"          "models"        
#> [313] "especially"     "for"            "the"            "ph"            
#> [317] "d"              "and"            "professional"   "degrees"       
#> [321] "as"             "the"            "prior"          "distributions" 
#> [325] "on"             "the"            "error"          "rates"         
#> [329] "get"            "stronger"       "the"            "estimated"     
#> [333] "counts"         "increase"       "towards"        "22"            
#> 
#> 
#> [[23]]
#> [[23]][[1]]
#>   [1] "6"             "x"             "10"            "bachelors"    
#>   [5] "degree"        "6"             "x"             "10"           
#>   [9] "masters"       "degree"        "5.2"           "2.6"          
#>  [13] "acs"           "cia"           "model"         "model"        
#>  [17] "4"             "5"             "model"         "5"            
#>  [21] "2.5"           "estimated"     "total"         "no"           
#>  [25] "of"            "sci"           "and"           "eng"          
#>  [29] "degrees"       "estimated"     "total"         "no"           
#>  [33] "of"            "sci"           "and"           "eng"          
#>  [37] "degrees"       "model"         "6"             "model"        
#>  [41] "7"             "4.8"           "2.4"           "awarded"      
#>  [45] "to"            "women"         "awarded"       "to"           
#>  [49] "women"         "4.6"           "2.3"           "4.4"          
#>  [53] "2.2"           "4.2"           "2.1"           "4"            
#>  [57] "2"             "acs"           "cia"           "m4"           
#>  [61] "m5"            "m6"            "m7"            "acs"          
#>  [65] "cia"           "m4"            "m5"            "m6"           
#>  [69] "m7"            "model"         "model"         "5"            
#>  [73] "x"             "10"            "professional"  "degree"       
#>  [77] "x"             "10"            "5"             "phd"          
#>  [81] "degree"        "7.5"           "5"             "7"            
#>  [85] "4.5"           "estimated"     "total"         "no"           
#>  [89] "of"            "sci"           "and"           "eng"          
#>  [93] "degrees"       "6.5"           "estimated"     "total"        
#>  [97] "no"            "of"            "sci"           "and"          
#> [101] "eng"           "degrees"       "4"             "6"            
#> [105] "awarded"       "to"            "women"         "awarded"      
#> [109] "to"            "women"         "5.5"           "3.5"          
#> [113] "5"             "3"             "4.5"           "2.5"          
#> [117] "4"             "2"             "3.5"           "3"            
#> [121] "1.5"           "acs"           "cia"           "m4"           
#> [125] "m5"            "m6"            "m7"            "acs"          
#> [129] "cia"           "m4"            "m5"            "m6"           
#> [133] "m7"            "model"         "model"         "figure"       
#> [137] "2"             "the"           "estimated"     "total"        
#> [141] "number"        "of"            "science"       "and"          
#> [145] "engineering"   "degrees"       "awarded"       "to"           
#> [149] "women"         "under"         "each"          "model"        
#> [153] "we"            "plot"          "the"           "mean"         
#> [157] "and"           "95"            "confidence"    "intervals"    
#> [161] "note"          "the"           "difference"    "in"           
#> [165] "scale"         "for"           "each"          "degree"       
#> [169] "category"      "the"           "estimate"      "using"        
#> [173] "the"           "acs"           "reported"      "education"    
#> [177] "we"            "note"          "that"          "using"        
#> [181] "the"           "acs"           "reported"      "education"    
#> [185] "without"       "adjustments"   "results"       "in"           
#> [189] "substantially" "higher"        "estimated"     "totals"       
#> [193] "at"            "the"           "professional"  "and"          
#> [197] "ph"            "d"             "levels"        "than"         
#> [201] "any"           "of"            "the"           "models"       
#> [205] "that"          "account"       "for"           "measurement"  
#> [209] "error"         "we"            "also"          "note"         
#> [213] "that"          "the"           "cia"           "model"        
#> [217] "yields"        "considerably"  "lower"         "counts"       
#> [221] "for"           "all"           "but"           "bachelor’s"   
#> [225] "degrees"       "figure"        "3"             "displays"     
#> [229] "inferences"    "for"           "the"           "average"      
#> [233] "income"        "for"           "different"     "degrees"      
#> [237] "for"           "most"          "degrees"       "the"          
#> [241] "point"         "estimates"     "for"           "models"       
#> [245] "4"             "7"             "are"           "reasonably"   
#> [249] "close"         "with"          "models"        "4"            
#> [253] "23"           
#> 
#> 
#> [[24]]
#> [[24]][[1]]
#>   [1] "and"            "5"              "again"          "giving"        
#>   [5] "similar"        "results"        "the"            "estimated"     
#>   [9] "average"        "income"         "for"            "professionals" 
#>  [13] "differs"        "noticeably"     "across"         "models"        
#>  [17] "with"           "model"          "4"              "and"           
#>  [21] "model"          "5"              "suggesting"     "lower"         
#>  [25] "averages"       "than"           "the"            "unadjusted"    
#>  [29] "acs"            "estimates"      "or"             "than"          
#>  [33] "models"         "6"              "and"            "7"             
#>  [37] "we"             "note"           "that"           "the"           
#>  [41] "cia"            "model"          "estimates"      "are"           
#>  [45] "clearly"        "implausible"    "as"             "an"            
#>  [49] "independent"    "check"          "on"             "these"         
#>  [53] "estimates"      "we"             "considered"     "the"           
#>  [57] "estimated"      "average"        "earnings"       "in"            
#>  [61] "the"            "2010"           "current"        "population"    
#>  [65] "survey"         "they"           "are"            "83,720"        
#>  [69] "for"            "professional"   "80,600"         "for"           
#>  [73] "ph"             "d"              "degree"         "66,144"        
#>  [77] "for"            "master’s"       "degree"         "and"           
#>  [81] "53,976"         "for"            "bachelor’s"     "degree"        
#>  [85] "http"           "www"            "collegequest"   "com"           
#>  [89] "bls"            "research"       "education"      "pays"          
#>  [93] "2010"           "aspx"           "these"          "line"          
#>  [97] "up"             "more"           "closely"        "with"          
#> [101] "the"            "estimates"      "from"           "model"         
#> [105] "5"              "than"           "any"            "other"         
#> [109] "model"          "especially"     "for"            "the"           
#> [113] "professional"   "degree"         "category"       "where"         
#> [117] "the"            "estimates"      "most"           "differ"        
#> [121] "figure"         "4"              "displays"       "inferences"    
#> [125] "for"            "the"            "average"        "income"        
#> [129] "for"            "men"            "and"            "women"         
#> [133] "all"            "models"         "support"        "the"           
#> [137] "conclusion"     "that"           "men"            "make"          
#> [141] "more"           "than"           "women"          "apparently"    
#> [145] "misreporting"   "in"             "education"      "does"          
#> [149] "not"            "account"        "for"            "that"          
#> [153] "gap"            "at"             "least"          "for"           
#> [157] "the"            "models"         "considered"     "here"          
#> [161] "we"             "note"           "that"           "model"         
#> [165] "4"              "suggests"       "potentially"    "larger"        
#> [169] "income"         "gaps"           "between"        "male"          
#> [173] "and"            "female"         "ph"             "d"             
#> [177] "recipients"     "than"           "the"            "other"         
#> [181] "models"         "5"              "concluding"     "remarks"       
#> [185] "the"            "framework"      "presented"      "in"            
#> [189] "this"           "article"        "offers"         "analysts"      
#> [193] "tools"          "for"            "using"          "the"           
#> [197] "information"    "in"             "a"              "high"          
#> [201] "quality"        "separate"       "data"           "source"        
#> [205] "to"             "adjust"         "for"            "measurement"   
#> [209] "errors"         "in"             "the"            "database"      
#> [213] "of"             "interest"       "key"            "to"            
#> [217] "the"            "framework"      "is"             "to"            
#> [221] "replace"        "conditional"    "independence"   "assumptions"   
#> [225] "typically"      "used"           "in"             "data"          
#> [229] "fusion"         "with"           "carefully"      "considered"    
#> [233] "measurement"    "error"          "models"         "this"          
#> [237] "avoids"         "sacrificing"    "information"    "and"           
#> [241] "facilitates"    "analysis"       "of"             "the"           
#> [245] "sensitivity"    "of"             "conclusions"    "to"            
#> [249] "alternative"    "measurement"    "error"          "specifications"
#> [253] "analysts"       "can"            "use"            "diagnostic"    
#> [257] "tests"          "to"             "rule"           "out"           
#> [261] "some"           "measurement"    "error"          "models"        
#> [265] "and"            "perform"        "sensibility"    "tests"         
#> [269] "on"             "others"         "to"             "identify"      
#> [273] "reasonable"     "candidates"     "24"            
#> 
#> 
#> [[25]]
#> [[25]][[1]]
#>   [1] "4"             "x"             "10"            "10"           
#>   [5] "acs"           "cia"           "model"         "model"        
#>   [9] "4"             "9"             "model"         "5"            
#>  [13] "model"         "6"             "model"         "7"            
#>  [17] "8"             "estimated"     "average"       "income"       
#>  [21] "7"             "6"             "5"             "4"            
#>  [25] "3"             "ba"            "ma"            "prof"         
#>  [29] "phd"           "none"          "education"     "level"        
#>  [33] "figure"        "3"             "multiple"      "imputation"   
#>  [37] "point"         "and"           "95"            "confidence"   
#>  [41] "interval"      "estimates"     "for"           "the"          
#>  [45] "average"       "income"        "within"        "each"         
#>  [49] "education"     "level"         "the"           "acs"          
#>  [53] "estimate"      "is"            "the"           "survey"       
#>  [57] "weighted"      "estimate"      "based"         "on"           
#>  [61] "the"           "reported"      "education"     "level"        
#>  [65] "in"            "the"           "2010"          "acs"          
#>  [69] "besides"       "survey"        "sampling"      "contexts"     
#>  [73] "like"          "the"           "one"           "considered"   
#>  [77] "here"          "involving"     "the"           "acs"          
#>  [81] "and"           "nscg"          "the"           "framework"    
#>  [85] "offers"        "potential"     "approaches"    "for"          
#>  [89] "dealing"       "with"          "possible"      "mea"          
#>  [93] "surement"      "errors"        "in"            "organic"      
#>  [97] "big"           "data"          "this"          "is"           
#> [101] "increasingly"  "important"     "as"            "data"         
#> [105] "stewards"      "and"           "analysts"      "consider"     
#> [109] "replacing"     "or"            "supplementing" "high"         
#> [113] "quality"       "but"           "expensive"     "surveys"      
#> [117] "with"          "inexpensive"   "and"           "large"        
#> [121] "sample"        "organic"       "data"          "often"        
#> [125] "scant"         "attention"     "is"            "paid"         
#> [129] "to"            "the"           "potential"     "impact"       
#> [133] "of"            "measurement"   "errors"        "on"           
#> [137] "inferences"    "from"          "those"         "data"         
#> [141] "the"           "framework"     "could"         "be"           
#> [145] "used"          "with"          "high"          "quality"      
#> [149] "validated"     "surveys"       "as"            "the"          
#> [153] "gold"          "standard"      "data"          "allowing"     
#> [157] "for"           "adjustments"   "to"            "the"          
#> [161] "error"         "prone"         "organic"       "data"         
#> [165] "25"           
#> 
#> 
#> [[26]]
#> [[26]][[1]]
#>   [1] "4"                "x"                "10"              
#>   [4] "bachelors"        "degree"           "4"               
#>   [7] "x"                "10"               "masters"         
#>  [10] "degree"           "7"                "8"               
#>  [13] "male"             "small"            "marker"          
#>  [16] "female"           "large"            "marker"          
#>  [19] "6.5"              "7.5"              "7"               
#>  [22] "estimated"        "average"          "income"          
#>  [25] "by"               "gender"           "estimated"       
#>  [28] "average"          "income"           "by"              
#>  [31] "gender"           "6"                "6.5"             
#>  [34] "5.5"              "6"                "5"               
#>  [37] "5.5"              "4.5"              "5"               
#>  [40] "4"                "4.5"              "3.5"             
#>  [43] "4"                "3"                "3.5"             
#>  [46] "acs"              "cia"              "m4"              
#>  [49] "m5"               "m6"               "m7"              
#>  [52] "acs"              "cia"              "m4"              
#>  [55] "m5"               "m6"               "m7"              
#>  [58] "model"            "specification"    "model"           
#>  [61] "specification"    "4"                "x"               
#>  [64] "10"               "professional"     "degree"          
#>  [67] "x"                "10"               "4"               
#>  [70] "phd"              "degree"           "12"              
#>  [73] "10"               "11"               "9"               
#>  [76] "10"               "estimated"        "average"         
#>  [79] "income"           "by"               "gender"          
#>  [82] "estimated"        "average"          "income"          
#>  [85] "by"               "gender"           "8"               
#>  [88] "9"                "8"                "7"               
#>  [91] "7"                "6"                "6"               
#>  [94] "5"                "5"                "4"               
#>  [97] "4"                "3"                "3"               
#> [100] "acs"              "cia"              "m4"              
#> [103] "m5"               "m6"               "m7"              
#> [106] "acs"              "cia"              "m4"              
#> [109] "m5"               "m6"               "m7"              
#> [112] "model"            "specification"    "model"           
#> [115] "specification"    "figure"           "4"               
#> [118] "multiple"         "imputation"       "point"           
#> [121] "and"              "95"               "confidence"      
#> [124] "interval"         "estimates"        "for"             
#> [127] "the"              "average"          "income"          
#> [130] "for"              "men"              "and"             
#> [133] "women"            "within"           "each"            
#> [136] "education"        "level"            "the"             
#> [139] "acs"              "estimate"         "is"              
#> [142] "the"              "survey"           "weighted"        
#> [145] "estimate"         "based"            "on"              
#> [148] "the"              "reported"         "education"       
#> [151] "level"            "in"               "the"             
#> [154] "2010"             "acs"              "supplementary"   
#> [157] "materials"        "all"              "supplemental"    
#> [160] "files"            "listed"           "below"           
#> [163] "are"              "contained"        "in"              
#> [166] "a"                "single"           "zip"             
#> [169] "file"             "supplementary"    "zip"             
#> [172] "and"              "can"              "be"              
#> [175] "obtained"         "via"              "a"               
#> [178] "single"           "download"         "supplementary"   
#> [181] "results"          "supplementary"    "details"         
#> [184] "and"              "additional"       "results"         
#> [187] "for"              "paper"            "supp"            
#> [190] "material"         "final"            "pdf"             
#> [193] "acs"              "data"             "2010"            
#> [196] "acs"              "data"             "used"            
#> [199] "in"               "the"              "paper"           
#> [202] "acsdata"          "2010standardized" "csv"             
#> [205] "zip"              "26"              
#> 
#> 
#> [[27]]
#> [[27]][[1]]
#>   [1] "matlab"                "code"                  "matlab"               
#>   [4] "files"                 "containing"            "main"                 
#>   [7] "code"                  "maincode"              "edu"                  
#>  [10] "2010app"               "report1993"            "m"                    
#>  [13] "and"                   "helper"                "functions"            
#>  [16] "design"                "m"                     "and"                  
#>  [19] "dirsamp"               "m"                     "as"                   
#>  [22] "well"                  "as"                    "parameter"            
#>  [25] "files"                 "mu"                    "mat"                  
#>  [28] "and"                   "tauspd"                "mat"                  
#>  [31] "code"                  "zip"                   "prior"                
#>  [34] "distributions"         "csv"                   "files"                
#>  [37] "are"                   "provided"              "for"                  
#>  [40] "priors"                "used"                  "in"                   
#>  [43] "model"                 "5"                     "and"                  
#>  [46] "read"                  "in"                    "by"                   
#>  [49] "main"                  "matlab"                "code"                 
#>  [52] "referred"              "to"                    "as"                   
#>  [55] "femalereportprior1993" "csv"                   "malereport"           
#>  [58] "prior1993"             "csv"                   "betareportprior"      
#>  [61] "csv"                   "priors"                "zip"                  
#>  [64] "references"            "abayomi"               "k"                    
#>  [67] "gelman"                "a"                     "and"                  
#>  [70] "levy"                  "m"                     "2008"                 
#>  [73] "diagnostics"           "for"                   "multivariate"         
#>  [76] "impu"                  "tations"               "journal"              
#>  [79] "of"                    "the"                   "royal"                
#>  [82] "statistical"           "society"               "series"               
#>  [85] "c"                     "applied"               "statistics"           
#>  [88] "57"                    "273"                   "291"                  
#>  [91] "black"                 "d"                     "haviland"             
#>  [94] "a"                     "sanders"               "s"                    
#>  [97] "and"                   "taylor"                "l"                    
#> [100] "2006"                  "why"                   "do"                   
#> [103] "minority"              "men"                   "earn"                 
#> [106] "less"                  "a"                     "study"                
#> [109] "of"                    "wage"                  "differentials"        
#> [112] "among"                 "the"                   "highly"               
#> [115] "educated"              "the"                   "review"               
#> [118] "of"                    "economics"             "and"                  
#> [121] "statistics"            "88"                    "300"                  
#> [124] "313"                   "black"                 "d"                    
#> [127] "sanders"               "s"                     "and"                  
#> [130] "taylor"                "l"                     "2003"                 
#> [133] "measurement"           "of"                    "higher"               
#> [136] "education"             "in"                    "the"                  
#> [139] "census"                "and"                   "current"              
#> [142] "population"            "survey"                "journal"              
#> [145] "of"                    "the"                   "american"             
#> [148] "statistical"           "association"           "98"                   
#> [151] "545"                   "554"                   "black"                
#> [154] "d"                     "a"                     "haviland"             
#> [157] "a"                     "m"                     "sanders"              
#> [160] "s"                     "g"                     "and"                  
#> [163] "taylor"                "l"                     "j"                    
#> [166] "2008"                  "gender"                "wage"                 
#> [169] "disparities"           "among"                 "the"                  
#> [172] "highly"                "educated"              "journal"              
#> [175] "of"                    "human"                 "resources"            
#> [178] "43"                    "630"                   "659"                  
#> [181] "carrig"                "m"                     "manrique"             
#> [184] "vallier"               "d"                     "ranby"                
#> [187] "k"                     "reiter"                "j"                    
#> [190] "p"                     "and"                   "hoyle"                
#> [193] "r"                     "2015"                  "a"                    
#> [196] "multiple"              "imputation"            "based"                
#> [199] "method"                "for"                   "the"                  
#> [202] "retrospective"         "harmonization"         "of"                   
#> [205] "data"                  "sets"                  "multivariate"         
#> [208] "behavioral"            "research"              "50"                   
#> [211] "383"                   "397"                   "curran"               
#> [214] "p"                     "j"                     "and"                  
#> [217] "hussong"               "a"                     "m"                    
#> [220] "2009"                  "integrative"           "data"                 
#> [223] "analysis"              "the"                   "simultaneous"         
#> [226] "analysis"              "of"                    "multiple"             
#> [229] "data"                  "sets"                  "psychological"        
#> [232] "methods"               "14"                    "81"                   
#> [235] "100"                   "d’orazio"              "m"                    
#> [238] "di"                    "zio"                   "m"                    
#> [241] "and"                   "scanu"                 "m"                    
#> [244] "2006"                  "statistical"           "matching"             
#> [247] "theory"                "and"                   "practice"             
#> [250] "hoboken"               "nj"                    "wiley"                
#> [253] "dunson"                "d"                     "b"                    
#> [256] "and"                   "xing"                  "c"                    
#> [259] "2009"                  "nonparametric"         "bayes"                
#> [262] "modeling"              "of"                    "multivariate"         
#> [265] "categorical"           "data"                  "journal"              
#> [268] "of"                    "the"                   "american"             
#> [271] "statistical"           "association"           "104"                  
#> [274] "1042"                  "1051"                  "fesco"                
#> [277] "r"                     "s"                     "frase"                
#> [280] "m"                     "j"                     "and"                  
#> [283] "kannankutty"           "n"                     "2012"                 
#> [286] "using"                 "the"                   "american"             
#> [289] "commu"                 "nity"                  "survey"               
#> [292] "as"                    "the"                   "sampling"             
#> [295] "frame"                 "for"                   "the"                  
#> [298] "national"              "survey"                "of"                   
#> [301] "college"               "graduates"             "working"              
#> [304] "paper"                 "ncses"                 "12"                   
#> [307] "201"                   "national"              "science"              
#> [310] "foundation"            "national"              "center"               
#> [313] "for"                   "science"               "and"                  
#> [316] "engineering"           "statistics"            "arlington"            
#> [319] "va"                    "27"                   
#> 
#> 
#> [[28]]
#> [[28]][[1]]
#>   [1] "finamore"       "j"              "2013"           "national"      
#>   [5] "survey"         "of"             "college"        "graduates"     
#>   [9] "about"          "the"            "survey"         "na"            
#>  [13] "tional"         "center"         "for"            "science"       
#>  [17] "and"            "engineering"    "statistics"     "fosdick"       
#>  [21] "b"              "k"              "deyoreo"        "m"             
#>  [25] "and"            "reiter"         "j"              "p"             
#>  [29] "2016"           "categorical"    "data"           "fusion"        
#>  [33] "using"          "auxiliary"      "information"    "annals"        
#>  [37] "of"             "applied"        "statistics"     "to"            
#>  [41] "appear"         "he"             "y"              "landrum"       
#>  [45] "m"              "b"              "and"            "zaslavksy"     
#>  [49] "a"              "m"              "2014"           "combining"     
#>  [53] "information"    "from"           "two"            "data"          
#>  [57] "sources"        "with"           "misreporting"   "and"           
#>  [61] "incompleteness" "to"             "assess"         "hospice"       
#>  [65] "use"            "among"          "cancer"         "patients"      
#>  [69] "a"              "multiple"       "imputation"     "appraoch"      
#>  [73] "statistics"     "in"             "medicine"       "33"            
#>  [77] "3710"           "3724"           "hirano"         "k"             
#>  [81] "imbens"         "g"              "ridder"         "g"             
#>  [85] "and"            "rubin"          "d"              "2001"          
#>  [89] "combining"      "panel"          "data"           "sets"          
#>  [93] "with"           "attrition"      "and"            "refreshment"   
#>  [97] "samples"        "econometrica"   "69"             "1645"          
#> [101] "1659"           "kim"            "h"              "j"             
#> [105] "cox"            "l"              "h"              "karr"          
#> [109] "a"              "f"              "reiter"         "j"             
#> [113] "p"              "and"            "wang"           "q"             
#> [117] "2015"           "simultane"      "ous"            "edit"          
#> [121] "imputation"     "for"            "continuous"     "microdata"     
#> [125] "journal"        "of"             "the"            "american"      
#> [129] "statistical"    "association"    "110"            "987"           
#> [133] "999"            "lohr"           "s"              "l"             
#> [137] "2010"           "sampling"       "design"         "and"           
#> [141] "analysis"       "boston"         "ma"             "brooks"        
#> [145] "cole"           "2nd"            "ed"             "manrique"      
#> [149] "vallier"        "d"              "and"            "reiter"        
#> [153] "j"              "p"              "2016"           "bayesian"      
#> [157] "simultaneous"   "edit"           "and"            "impu"          
#> [161] "tation"         "for"            "multivariate"   "categorical"   
#> [165] "data"           "journal"        "of"             "the"           
#> [169] "american"       "statistical"    "asso"           "ciation"       
#> [173] "to"             "appear"         "moriarity"      "c"             
#> [177] "and"            "scheuren"       "f"              "2001"          
#> [181] "statistical"    "matching"       "a"              "paradigm"      
#> [185] "for"            "assessing"      "the"            "uncertainty"   
#> [189] "in"             "the"            "procedure"      "journal"       
#> [193] "of"             "official"       "statistics"     "17"            
#> [197] "407"            "422"            "national"       "science"       
#> [201] "foundation"     "1993"           "national"       "survey"        
#> [205] "of"             "college"        "graduates"      "1993"          
#> [209] "http"           "doi"            "org"            "10.3886"       
#> [213] "icpsr06880"     "v1"             "icpsr06880"     "v1"            
#> [217] "ann"            "arbor"          "mi"             "inter"         
#> [221] "university"     "consortium"     "for"            "political"     
#> [225] "and"            "social"         "research"       "distributor"   
#> [229] "2014"           "10"             "02"             "pepe"          
#> [233] "m"              "s"              "1992"           "inference"     
#> [237] "using"          "surrogate"      "outcome"        "data"          
#> [241] "and"            "a"              "validation"     "sample"        
#> [245] "biometrika"     "79"             "355"            "365"           
#> [249] "raghunathan"    "t"              "e"              "2006"          
#> [253] "combining"      "information"    "from"           "multiple"      
#> [257] "surveys"        "for"            "assess"         "ing"           
#> [261] "health"         "disparities"    "allgemeines"    "statistisches" 
#> [265] "archiv"         "90"             "515"            "526"           
#> [269] "rassler"        "s"              "2002"           "statistical"   
#> [273] "matching"       "new"            "york"           "springer"      
#> [277] "reiter"         "j"              "2008"           "multiple"      
#> [281] "imputation"     "when"           "records"        "used"          
#> [285] "for"            "imputation"     "are"            "not"           
#> [289] "used"           "or"             "disseminated"   "for"           
#> [293] "analysis"       "biometrika"     "95"             "933"           
#> [297] "946"            "reiter"         "j"              "p"             
#> [301] "2012"           "bayesian"       "finite"         "population"    
#> [305] "imputation"     "for"            "data"           "fusion"        
#> [309] "statistica"     "sinica"         "22"             "795"           
#> [313] "811"            "rubin"          "d"              "b"             
#> [317] "1986"           "statistical"    "matching"       "using"         
#> [321] "file"           "concatenation"  "with"           "adjusted"      
#> [325] "weights"        "and"            "multiple"       "imputations"   
#> [329] "journal"        "of"             "business"       "economic"      
#> [333] "statistics"     "4"              "87"             "94"            
#> [337] "28"            
#> 
#> 
#> [[29]]
#> [[29]][[1]]
#>   [1] "1987"           "multiple"       "imputation"     "for"           
#>   [5] "nonresponse"    "in"             "surveys"        "new"           
#>   [9] "york"           "john"           "wiley"          "sons"          
#>  [13] "schenker"       "n"              "and"            "raghunathan"   
#>  [17] "t"              "e"              "2007"           "combining"     
#>  [21] "information"    "from"           "multiple"       "surveys"       
#>  [25] "to"             "enhance"        "estimation"     "of"            
#>  [29] "measures"       "of"             "health"         "statistics"    
#>  [33] "in"             "medicine"       "26"             "1802"          
#>  [37] "1811"           "schenker"       "n"              "raghunathan"   
#>  [41] "t"              "e"              "and"            "bondarenko"    
#>  [45] "i"              "2010"           "improving"      "on"            
#>  [49] "analyses"       "of"             "self"           "reported"      
#>  [53] "data"           "in"             "a"              "large"         
#>  [57] "scale"          "health"         "survey"         "by"            
#>  [61] "using"          "information"    "from"           "an"            
#>  [65] "examination"    "based"          "survey"         "statistics"    
#>  [69] "in"             "medicine"       "29"             "533"           
#>  [73] "545"            "schifeling"     "t"              "a"             
#>  [77] "cheng"          "c"              "reiter"         "j"             
#>  [81] "p"              "and"            "hillygus"       "d"             
#>  [85] "s"              "2015"           "accounting"     "for"           
#>  [89] "nonignorable"   "unit"           "nonresponse"    "and"           
#>  [93] "attrition"      "in"             "panel"          "studies"       
#>  [97] "with"           "refreshment"    "samples"        "journal"       
#> [101] "of"             "survey"         "statistics"     "and"           
#> [105] "methodology"    "3"              "265"            "295"           
#> [109] "si"             "y"              "and"            "reiter"        
#> [113] "j"              "2013"           "nonparametric"  "bayesian"      
#> [117] "multiple"       "imputation"     "for"            "incom"         
#> [121] "plete"          "categorical"    "variables"      "in"            
#> [125] "large"          "scale"          "assessment"     "surveys"       
#> [129] "journal"        "of"             "educational"    "and"           
#> [133] "behavioral"     "statistics"     "38"             "499"           
#> [137] "521"            "si"             "y"              "reiter"        
#> [141] "j"              "p"              "and"            "hillygus"      
#> [145] "d"              "s"              "2015"           "semi"          
#> [149] "parametric"     "selection"      "models"         "for"           
#> [153] "potentially"    "non"            "ignorable"      "attrition"     
#> [157] "in"             "panel"          "studies"        "with"          
#> [161] "refreshment"    "samples"        "political"      "analysis"      
#> [165] "23"             "92"             "112"            "siddique"      
#> [169] "j"              "reiter"         "j"              "p"             
#> [173] "brincks"        "a"              "gibbons"        "r"             
#> [177] "d"              "crespi"         "c"              "m"             
#> [181] "and"            "brown"          "c"              "h"             
#> [185] "2015"           "multiple"       "imputation"     "for"           
#> [189] "harmonizing"    "longitudinal"   "non"            "commensurate"  
#> [193] "measures"       "in"             "individual"     "participant"   
#> [197] "data"           "meta"           "analysis"       "statistics"    
#> [201] "in"             "medicine"       "34"             "3399"          
#> [205] "3414"           "tarmast"        "g"              "2001"          
#> [209] "multivariate"   "log"            "normal"         "distribution"  
#> [213] "in"             "international"  "statistical"    "institute"     
#> [217] "seoul"          "53rd"           "session"        "yucel"         
#> [221] "r"              "m"              "and"            "zaslavsky"     
#> [225] "a"              "m"              "2005"           "imputation"    
#> [229] "of"             "binary"         "treatment"      "variables"     
#> [233] "with"           "measurement"    "error"          "in"            
#> [237] "administrative" "data"           "journal"        "of"            
#> [241] "the"            "american"       "statistical"    "association"   
#> [245] "100"            "1123"           "1132"           "29"            
#> 
#> 
#> [[30]]
#> [[30]][[1]]
#>   [1] "table"          "4"              "error"          "rate"          
#>   [5] "estimates"      "from"           "different"      "model"         
#>   [9] "specifications" "models"         "1"              "7"             
#>  [13] "are"            "run"            "for"            "100,000"       
#>  [17] "mcmc"           "iterations"     "we"             "save"          
#>  [21] "m"              "50"             "completed"      "datasets"      
#>  [25] "under"          "each"           "model"          "for"           
#>  [29] "each"           "dataset"        "we"             "compute"       
#>  [33] "the"            "estimated"      "overall"        "error"         
#>  [37] "rate"           "estimated"      "error"          "rate"          
#>  [41] "by"             "gender"         "and"            "imputed"       
#>  [45] "y"              "and"            "associated"     "variances"     
#>  [49] "using"          "ratio"          "estimators"     "that"          
#>  [53] "incorporate"    "the"            "acs"            "final"         
#>  [57] "survey"         "weights"        "estimate"       "estimate"      
#>  [61] "by"             "group"          "overall"        "y"             
#>  [65] "ba"             "y"              "ma"             "y"             
#>  [69] "prof"           "y"              "phd"            "cia"           
#>  [73] "model"          "male"           "37"             "36"            
#>  [77] "37"             "76"             "75"             "76"            
#>  [81] "91"             "91"             "92"             "94"            
#>  [85] "93"             "95"             "57"             "55"            
#>  [89] "58"             "female"         "35"             "35"            
#>  [93] "36"             "72"             "71"             "72"            
#>  [97] "95"             "94"             "95"             "97"            
#> [101] "96"             "97"             "model"          "1"             
#> [105] "male"           "05"             "04"             "06"            
#> [109] "10"             "08"             "11"             "18"            
#> [113] "15"             "21"             "27"             "23"            
#> [117] "31"             "17"             "16"             "19"            
#> [121] "female"         "05"             "05"             "06"            
#> [125] "09"             "08"             "10"             "18"            
#> [129] "15"             "21"             "28"             "24"            
#> [133] "32"             "model"          "2"              "male"          
#> [137] "05"             "04"             "06"             "18"            
#> [141] "16"             "21"             "27"             "18"            
#> [145] "37"             "36"             "30"             "42"            
#> [149] "20"             "18"             "21"             "female"        
#> [153] "05"             "05"             "06"             "12"            
#> [157] "10"             "14"             "26"             "20"            
#> [161] "33"             "41"             "29"             "53"            
#> [165] "model"          "3"              "male"           "05"            
#> [169] "04"             "06"             "09"             "08"            
#> [173] "11"             "17"             "14"             "20"            
#> [177] "25"             "21"             "30"             "17"            
#> [181] "16"             "19"             "female"         "05"            
#> [185] "05"             "06"             "09"             "08"            
#> [189] "10"             "17"             "14"             "20"            
#> [193] "26"             "21"             "31"             "model"         
#> [197] "4"              "male"           "05"             "04"            
#> [201] "06"             "19"             "16"             "23"            
#> [205] "36"             "26"             "46"             "36"            
#> [209] "27"             "45"             "22"             "20"            
#> [213] "24"             "female"         "09"             "08"            
#> [217] "10"             "14"             "11"             "17"            
#> [221] "52"             "44"             "59"             "55"            
#> [225] "40"             "70"             "model"          "5"             
#> [229] "male"           "07"             "06"             "08"            
#> [233] "19"             "16"             "22"             "23"            
#> [237] "14"             "32"             "34"             "27"            
#> [241] "41"             "22"             "20"             "24"            
#> [245] "female"         "09"             "08"             "10"            
#> [249] "12"             "09"             "15"             "50"            
#> [253] "43"             "57"             "31"             "17"            
#> [257] "46"             "model"          "6"              "male"          
#> [261] "05"             "05"             "05"             "09"            
#> [265] "08"             "10"             "10"             "09"            
#> [269] "11"             "10"             "09"             "11"            
#> [273] "16"             "14"             "17"             "female"        
#> [277] "05"             "04"             "05"             "06"            
#> [281] "05"             "07"             "16"             "14"            
#> [285] "18"             "07"             "06"             "09"            
#> [289] "model"          "7"              "male"           "01"            
#> [293] "01"             "01"             "01"             "00"            
#> [297] "01"             "00"             "00"             "01"            
#> [301] "01"             "00"             "01"             "11"            
#> [305] "09"             "13"             "female"         "01"            
#> [309] "01"             "01"             "01"             "01"            
#> [313] "01"             "01"             "00"             "01"            
#> [317] "01"             "00"             "01"             "30"            
#> 
#> 
#> [[31]]
#> [[31]][[1]]
#>   [1] "table"         "5"             "estimated"     "mean"         
#>   [5] "and"           "95"            "confidence"    "interval"     
#>   [9] "of"            "reporting"     "probabilities" "under"        
#>  [13] "model"         "2"             "and"           "reporting"    
#>  [17] "probabilities" "by"            "gender"        "under"        
#>  [21] "model"         "4"             "z"             "ba"           
#>  [25] "z"             "ma"            "z"             "prof"         
#>  [29] "z"             "phd"           "y"             "ba"           
#>  [33] "model"         "2"             "95"            "87"           
#>  [37] "1.00"          "04"            "00"            "11"           
#>  [41] "01"            "00"            "03"            "model"        
#>  [45] "4"             "male"          "96"            "90"           
#>  [49] "1.00"          "02"            "00"            "07"           
#>  [53] "02"            "00"            "05"            "model"        
#>  [57] "4"             "female"        "67"            "58"           
#>  [61] "76"            "30"            "22"            "38"           
#>  [65] "03"            "00"            "07"            "y"            
#>  [69] "ma"            "model"         "2"             "02"           
#>  [73] "00"            "06"            "51"            "43"           
#>  [77] "59"            "47"            "39"            "55"           
#>  [81] "model"         "4"             "male"          "04"           
#>  [85] "00"            "11"            "57"            "48"           
#>  [89] "66"            "39"            "31"            "47"           
#>  [93] "model"         "4"             "female"        "11"           
#>  [97] "00"            "25"            "39"            "26"           
#> [101] "52"            "50"            "40"            "61"           
#> [105] "y"             "prof"          "model"         "2"            
#> [109] "05"            "00"            "16"            "69"           
#> [113] "54"            "83"            "26"            "14"           
#> [117] "38"            "model"         "4"             "male"         
#> [121] "02"            "00"            "06"            "69"           
#> [125] "44"            "94"            "29"            "04"           
#> [129] "54"            "model"         "4"             "female"       
#> [133] "91"            "79"            "1.00"          "06"           
#> [137] "00"            "16"            "04"            "00"           
#> [141] "10"            "y"             "phd"           "model"        
#> [145] "2"             "01"            "00"            "04"           
#> [149] "39"            "15"            "63"            "60"           
#> [153] "36"            "83"            "model"         "4"            
#> [157] "male"          "01"            "00"            "05"           
#> [161] "21"            "02"            "39"            "78"           
#> [165] "60"            "96"            "model"         "4"            
#> [169] "female"        "10"            "00"            "30"           
#> [173] "77"            "50"            "1.00"          "13"           
#> [177] "00"            "34"            "y"             "none"         
#> [181] "model"         "2"             "95"            "95"           
#> [185] "96"            "03"            "03"            "04"           
#> [189] "01"            "01"            "01"            "00"           
#> [193] "00"            "00"            "model"         "4"            
#> [197] "male"          "97"            "96"            "97"           
#> [201] "03"            "02"            "03"            "01"           
#> [205] "00"            "01"            "00"            "00"           
#> [209] "00"            "model"         "4"             "female"       
#> [213] "96"            "95"            "97"            "04"           
#> [217] "03"            "05"            "00"            "00"           
#> [221] "00"            "00"            "00"            "00"           
#> [225] "31"           
#> 
#>