[
    {
        "key": "ZXS8NHA8",
        "version": 4795,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/ZXS8NHA8",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/ZXS8NHA8",
                "type": "text/html"
            },
            "up": {
                "href": "https://api.zotero.org/groups/9554/items/G3UAXMDM",
                "type": "application/json"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 2592,
                "username": "wbl2745",
                "name": "William Lund",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/wbl2745",
                        "type": "text/html"
                    }
                }
            },
            "numChildren": 0
        },
        "data": {
            "key": "ZXS8NHA8",
            "version": 4795,
            "parentItem": "G3UAXMDM",
            "itemType": "attachment",
            "linkMode": "imported_url",
            "title": "ACM Full Text PDF",
            "accessDate": "2009-11-27T18:54:30Z",
            "url": "http://portal.acm.org/ft_gateway.cfm?id=1555437&type=pdf&coll=portal&dl=ACM&CFID=65328952&CFTOKEN=85749438",
            "note": "",
            "contentType": "application/pdf",
            "charset": "",
            "filename": "Lund and Ringger - 2009 - Improving optical character recognition through ef.pdf",
            "md5": "8307d3b390cc1f52325368a9cc944577",
            "mtime": 1259348094000,
            "tags": [],
            "relations": {},
            "dateAdded": "2011-01-11T22:44:50Z",
            "dateModified": "2011-01-11T22:44:50Z"
        }
    },
    {
        "key": "7VUAR8BR",
        "version": 4795,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/7VUAR8BR",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/7VUAR8BR",
                "type": "text/html"
            },
            "up": {
                "href": "https://api.zotero.org/groups/9554/items/G3UAXMDM",
                "type": "application/json"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 2592,
                "username": "wbl2745",
                "name": "William Lund",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/wbl2745",
                        "type": "text/html"
                    }
                }
            }
        },
        "data": {
            "key": "7VUAR8BR",
            "version": 4795,
            "parentItem": "G3UAXMDM",
            "itemType": "attachment",
            "linkMode": "imported_url",
            "title": "ACM Snapshot",
            "accessDate": "2009-11-27T18:53:57Z",
            "url": "http://portal.acm.org/citation.cfm?id=1555400.1555437&coll=portal&dl=ACM&CFID=65328952&CFTOKEN=85749438",
            "note": "",
            "contentType": "text/html",
            "charset": "utf-8",
            "filename": "citation.html",
            "md5": "b98ff94c8c417e745b831b2c2f3770bb",
            "mtime": 1259348037000,
            "tags": [],
            "relations": {},
            "dateAdded": "2011-01-11T22:44:50Z",
            "dateModified": "2011-01-11T22:44:50Z"
        }
    },
    {
        "key": "G3UAXMDM",
        "version": 1,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/G3UAXMDM",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/G3UAXMDM",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 2592,
                "username": "wbl2745",
                "name": "William Lund",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/wbl2745",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Lund and Ringger",
            "parsedDate": "2009",
            "numChildren": 2
        },
        "data": {
            "key": "G3UAXMDM",
            "version": 1,
            "itemType": "conferencePaper",
            "title": "Improving Optical Character Recognition through Efficient Multiple System Alignment",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "William B.",
                    "lastName": "Lund"
                },
                {
                    "creatorType": "author",
                    "firstName": "Eric K.",
                    "lastName": "Ringger"
                }
            ],
            "abstractNote": "Individual optical character recognition (OCR) engines vary in the types of errors they commit in recognizing text, particularly poor quality text. By aligning the output of multiple OCR engines and taking advantage of the differences between them, the error rate based on the aligned lattice of recognized words is significantly lower than the individual OCR word error rates. This lattice error rate constitutes a lower bound among aligned alternatives from the OCR output. Results from a collection of poor quality mid-twentieth century typewritten documents demonstrate an average reduction of 55.0% in the error rate of the lattice of alternatives and a realized word error rate (WER) reduction of 35.8% in a dictionary-based selection process. As an important precursor, an innovative admissible heuristic for the A* algorithm is developed, which results in a significant reduction in state space exploration to identify all optimal alignments of the OCR text output, a necessary step toward the construction of the word hypothesis lattice. On average 0.0079% of the state space is explored to identify all optimal alignments of the documents.",
            "proceedingsTitle": "Proceedings of the 9th ACM/IEEE-CS joint conference on Digital libraries",
            "conferenceName": "",
            "publisher": "ACM",
            "place": "Austin, TX, USA",
            "date": "2009",
            "eventPlace": "",
            "volume": "",
            "issue": "",
            "numberOfVolumes": "",
            "pages": "231-240",
            "series": "",
            "seriesNumber": "",
            "DOI": "10.1145/1555400.1555437",
            "ISBN": "978-1-60558-322-8",
            "citationKey": "",
            "url": "http://portal.acm.org/citation.cfm?id=1555400.1555437&coll=portal&dl=ACM&CFID=65328952&CFTOKEN=85749438",
            "accessDate": "2009-11-27T18:53:54Z",
            "ISSN": "",
            "archive": "ACM",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [
                {
                    "tag": "a* algorithm",
                    "type": 1
                },
                {
                    "tag": "ocr error rate reduction",
                    "type": 1
                },
                {
                    "tag": "text alignment",
                    "type": 1
                }
            ],
            "collections": [
                "J7QCXB2X"
            ],
            "relations": {},
            "dateAdded": "2011-01-11T22:44:50Z",
            "dateModified": "2011-01-11T22:44:50Z"
        }
    },
    {
        "key": "SE67G9NT",
        "version": 4795,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/SE67G9NT",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/SE67G9NT",
                "type": "text/html"
            },
            "up": {
                "href": "https://api.zotero.org/groups/9554/items/FENZVU77",
                "type": "application/json"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 2592,
                "username": "wbl2745",
                "name": "William Lund",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/wbl2745",
                        "type": "text/html"
                    }
                }
            }
        },
        "data": {
            "key": "SE67G9NT",
            "version": 4795,
            "parentItem": "FENZVU77",
            "itemType": "attachment",
            "linkMode": "imported_url",
            "title": "ACL Anthology » D10",
            "accessDate": "2010-12-28T21:55:26Z",
            "url": "http://aclweb.org/anthology/D/D10/",
            "note": "",
            "contentType": "text/html",
            "charset": "utf-8",
            "filename": "D10.html",
            "md5": "c071a0871d8e21ec274051b9c743f1f9",
            "mtime": 1293573326000,
            "tags": [],
            "relations": {},
            "dateAdded": "2011-01-11T22:43:57Z",
            "dateModified": "2011-01-11T22:43:57Z"
        }
    },
    {
        "key": "WXC9CEBH",
        "version": 4795,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/WXC9CEBH",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/WXC9CEBH",
                "type": "text/html"
            },
            "up": {
                "href": "https://api.zotero.org/groups/9554/items/FENZVU77",
                "type": "application/json"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 2592,
                "username": "wbl2745",
                "name": "William Lund",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/wbl2745",
                        "type": "text/html"
                    }
                }
            },
            "numChildren": 0
        },
        "data": {
            "key": "WXC9CEBH",
            "version": 4795,
            "parentItem": "FENZVU77",
            "itemType": "attachment",
            "linkMode": "imported_file",
            "title": "D10-1024.pdf",
            "accessDate": "",
            "url": "",
            "note": "",
            "contentType": "application/pdf",
            "charset": "",
            "filename": "D10-1024.pdf",
            "md5": "7a842d7d17b005a4dea41ab42576c123",
            "mtime": 1293573256000,
            "tags": [],
            "relations": {},
            "dateAdded": "2011-01-11T22:43:57Z",
            "dateModified": "2011-01-11T22:43:57Z"
        }
    },
    {
        "key": "2JKXNGQV",
        "version": 1,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/2JKXNGQV",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/2JKXNGQV",
                "type": "text/html"
            },
            "up": {
                "href": "https://api.zotero.org/groups/9554/items/FENZVU77",
                "type": "application/json"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 2592,
                "username": "wbl2745",
                "name": "William Lund",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/wbl2745",
                        "type": "text/html"
                    }
                }
            }
        },
        "data": {
            "key": "2JKXNGQV",
            "version": 1,
            "parentItem": "FENZVU77",
            "itemType": "attachment",
            "linkMode": "linked_url",
            "title": "ACL Anthology » D10",
            "accessDate": "2010-12-28T21:55:46Z",
            "url": "http://aclweb.org/anthology/D/D10/",
            "note": "",
            "contentType": "text/html",
            "charset": "utf-8",
            "tags": [],
            "relations": {},
            "dateAdded": "2011-01-11T22:43:57Z",
            "dateModified": "2011-01-11T22:43:57Z"
        }
    },
    {
        "key": "FENZVU77",
        "version": 1,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/FENZVU77",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/FENZVU77",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 2592,
                "username": "wbl2745",
                "name": "William Lund",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/wbl2745",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Walker et al.",
            "parsedDate": "2010-10-09",
            "numChildren": 3
        },
        "data": {
            "key": "FENZVU77",
            "version": 1,
            "itemType": "conferencePaper",
            "title": "Evaluating Models of Latent Document Semantics in the Presence of OCR Errors",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Daniel D.",
                    "lastName": "Walker"
                },
                {
                    "creatorType": "author",
                    "firstName": "William B.",
                    "lastName": "Lund"
                },
                {
                    "creatorType": "author",
                    "firstName": "Eric K.",
                    "lastName": "Ringger"
                }
            ],
            "abstractNote": "Models of latent document semantics such as the mixture of multinomials model and La- tent Dirichlet Allocation have received sub- stantial attention for their ability to discover topical semantics in large collections of text. In an effort to apply such models to noisy optical character recognition (OCR) text out- put, we endeavor to understand the effect that character-level noise can have on unsu- pervised topic modeling. We show the ef- fects both with document-level topic analy- sis (document clustering) and with word-level topic analysis (LDA) on both synthetic and real-world OCR data. As expected, experi- mental results show that performance declines as word error rates increase. Common tech- niques for alleviating these problems, such as filtering low-frequency words, are successful in enhancing model quality, but exhibit fail- ure trends similar to models trained on unpro- cessed OCR output in the case of LDA. To our knowledge, this study is the first of its kind.",
            "proceedingsTitle": "Proceedings of the Conference on Empirical Methods in Natural Language Processing 2010 (EMNLP 2010)",
            "conferenceName": "Conference on Empirical Methods in Natural Language Processing 2010 (EMNLP 2010)",
            "publisher": "",
            "place": "Cambridge, Mass.",
            "date": "Oct 9, 2010",
            "eventPlace": "",
            "volume": "",
            "issue": "",
            "numberOfVolumes": "",
            "pages": "",
            "series": "",
            "seriesNumber": "",
            "DOI": "",
            "ISBN": "",
            "citationKey": "",
            "url": "http://www.aclweb.org/anthology/D10-1024",
            "accessDate": "",
            "ISSN": "",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [],
            "collections": [
                "J7QCXB2X"
            ],
            "relations": {},
            "dateAdded": "2011-01-11T22:43:57Z",
            "dateModified": "2011-01-11T22:43:57Z"
        }
    },
    {
        "key": "D78SQ8VM",
        "version": 1,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/D78SQ8VM",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/D78SQ8VM",
                "type": "text/html"
            },
            "up": {
                "href": "https://api.zotero.org/groups/9554/items/GVMSI6NI",
                "type": "application/json"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 110125,
                "username": "ringger",
                "name": "",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/ringger",
                        "type": "text/html"
                    }
                }
            },
            "lastModifiedByUser": {
                "id": 33222,
                "username": "tpacker",
                "name": "Thomas L. Packer",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/tpacker",
                        "type": "text/html"
                    }
                }
            }
        },
        "data": {
            "key": "D78SQ8VM",
            "version": 1,
            "parentItem": "GVMSI6NI",
            "itemType": "attachment",
            "linkMode": "imported_url",
            "title": "DBLP Record 'conf/kdd/WalkerR08'",
            "accessDate": "2010-01-19T22:45:02Z",
            "url": "http://dblp.uni-trier.de/rec/bibtex/conf/kdd/WalkerR08",
            "note": "",
            "contentType": "text/html",
            "charset": "windows-1252",
            "filename": "WalkerR08.html",
            "md5": "348211d0ce12c9fbe95fa05e240d205c",
            "mtime": 2147483647,
            "tags": [],
            "relations": {},
            "dateAdded": "2010-01-19T22:45:02Z",
            "dateModified": "2010-02-16T03:51:02Z"
        }
    },
    {
        "key": "EWJP63IW",
        "version": 1,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/EWJP63IW",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/EWJP63IW",
                "type": "text/html"
            },
            "up": {
                "href": "https://api.zotero.org/groups/9554/items/B8AKXCP8",
                "type": "application/json"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 33222,
                "username": "tpacker",
                "name": "Thomas L. Packer",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/tpacker",
                        "type": "text/html"
                    }
                }
            }
        },
        "data": {
            "key": "EWJP63IW",
            "version": 1,
            "parentItem": "B8AKXCP8",
            "itemType": "attachment",
            "linkMode": "imported_url",
            "title": "ACM Snapshot",
            "accessDate": "2009-10-13T15:57:30Z",
            "url": "http://portal.acm.org/citation.cfm?id=276330",
            "note": "",
            "contentType": "text/html",
            "charset": "utf-8",
            "filename": "citation.html",
            "md5": null,
            "mtime": 2147483647,
            "tags": [],
            "relations": {},
            "dateAdded": "2010-01-12T17:08:27Z",
            "dateModified": "2010-01-22T14:17:10Z"
        }
    },
    {
        "key": "U53GTT5Z",
        "version": 1,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/U53GTT5Z",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/U53GTT5Z",
                "type": "text/html"
            },
            "up": {
                "href": "https://api.zotero.org/groups/9554/items/PAKPU8Q9",
                "type": "application/json"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 33222,
                "username": "tpacker",
                "name": "Thomas L. Packer",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/tpacker",
                        "type": "text/html"
                    }
                }
            }
        },
        "data": {
            "key": "U53GTT5Z",
            "version": 1,
            "parentItem": "PAKPU8Q9",
            "itemType": "attachment",
            "linkMode": "imported_url",
            "title": "ACM Snapshot",
            "accessDate": "2009-10-13T15:57:30Z",
            "url": "http://portal.acm.org/citation.cfm?id=276330",
            "note": "",
            "contentType": "text/html",
            "charset": "utf-8",
            "filename": "citation.html",
            "md5": null,
            "mtime": 2147483647,
            "tags": [],
            "relations": {
                "owl:sameAs": "http://zotero.org/groups/9198/items/PFWFCDDT"
            },
            "dateAdded": "2010-01-06T01:35:54Z",
            "dateModified": "2010-01-22T14:17:10Z"
        }
    },
    {
        "key": "N695N5HD",
        "version": 1,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/N695N5HD",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/N695N5HD",
                "type": "text/html"
            },
            "up": {
                "href": "https://api.zotero.org/groups/9554/items/2GKQAFMW",
                "type": "application/json"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 33222,
                "username": "tpacker",
                "name": "Thomas L. Packer",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/tpacker",
                        "type": "text/html"
                    }
                }
            }
        },
        "data": {
            "key": "N695N5HD",
            "version": 1,
            "parentItem": "2GKQAFMW",
            "itemType": "attachment",
            "linkMode": "imported_url",
            "title": "ACM Snapshot",
            "accessDate": "2009-10-22T16:29:35Z",
            "url": "http://portal.acm.org/citation.cfm?id=1559845.1559925",
            "note": "",
            "contentType": "text/html",
            "charset": "utf-8",
            "filename": "citation.html",
            "md5": null,
            "mtime": 2147483647,
            "tags": [],
            "relations": {
                "owl:sameAs": "http://zotero.org/groups/9198/items/D8P3FIC9"
            },
            "dateAdded": "2010-01-06T01:35:54Z",
            "dateModified": "2010-01-22T14:17:10Z"
        }
    },
    {
        "key": "GVMSI6NI",
        "version": 1,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/GVMSI6NI",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/GVMSI6NI",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 110125,
                "username": "ringger",
                "name": "",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/ringger",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Walker and Ringger",
            "parsedDate": "2008",
            "numChildren": 1
        },
        "data": {
            "key": "GVMSI6NI",
            "version": 1,
            "itemType": "conferencePaper",
            "title": "Model-based document clustering with a collapsed gibbs sampler",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Daniel David",
                    "lastName": "Walker"
                },
                {
                    "creatorType": "author",
                    "firstName": "Eric K.",
                    "lastName": "Ringger"
                }
            ],
            "abstractNote": "",
            "proceedingsTitle": "Proceeding of the 14th ACM SIGKDD international conference on Knowledge discovery and data mining - KDD '08",
            "conferenceName": "Proceeding of the 14th ACM SIGKDD international conference",
            "publisher": "",
            "place": "Las Vegas, Nevada, USA",
            "date": "2008",
            "eventPlace": "",
            "volume": "",
            "issue": "",
            "numberOfVolumes": "",
            "pages": "704",
            "series": "",
            "seriesNumber": "",
            "DOI": "10.1145/1401890.1401975",
            "ISBN": "",
            "citationKey": "",
            "url": "http://dblp.uni-trier.de/rec/bibtex/conf/kdd/WalkerR08",
            "accessDate": "2010-01-19T22:45:01Z",
            "ISSN": "",
            "archive": "CrossRef",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [],
            "collections": [
                "KX7BJAKM"
            ],
            "relations": {},
            "dateAdded": "2010-01-19T22:45:01Z",
            "dateModified": "2010-01-19T22:45:01Z"
        }
    },
    {
        "key": "TG4VKKAJ",
        "version": 1,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/TG4VKKAJ",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/TG4VKKAJ",
                "type": "text/html"
            },
            "up": {
                "href": "https://api.zotero.org/groups/9554/items/STEGVCTM",
                "type": "application/json"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 33222,
                "username": "tpacker",
                "name": "Thomas L. Packer",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/tpacker",
                        "type": "text/html"
                    }
                }
            }
        },
        "data": {
            "key": "TG4VKKAJ",
            "version": 1,
            "parentItem": "STEGVCTM",
            "itemType": "attachment",
            "linkMode": "linked_url",
            "title": "li07.pdf (application/pdf Object)",
            "accessDate": "2009-11-30T21:00:49Z",
            "url": "http://nlp.cs.nyu.edu/sekine/papers/li07.pdf",
            "note": "",
            "contentType": "application/pdf",
            "charset": "windows-1252",
            "tags": [],
            "relations": {},
            "dateAdded": "2010-01-19T22:31:37Z",
            "dateModified": "2010-01-19T22:31:37Z"
        }
    },
    {
        "key": "STEGVCTM",
        "version": 1,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/STEGVCTM",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/STEGVCTM",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 33222,
                "username": "tpacker",
                "name": "Thomas L. Packer",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/tpacker",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Nadeau and Sekine",
            "parsedDate": "2007-01",
            "numChildren": 1
        },
        "data": {
            "key": "STEGVCTM",
            "version": 1,
            "itemType": "journalArticle",
            "title": "A survey of named entity recognition and classification",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "David",
                    "lastName": "Nadeau"
                },
                {
                    "creatorType": "author",
                    "firstName": "Satoshi",
                    "lastName": "Sekine"
                }
            ],
            "abstractNote": "",
            "publicationTitle": "Linguisticae Investigationes",
            "publisher": "",
            "place": "",
            "date": "January 2007",
            "volume": "30",
            "issue": "1",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "3-26",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "",
            "DOI": "",
            "citationKey": "",
            "url": "http://www.ingentaconnect.com/content/jbp/li/2007/00000030/00000001/art00002",
            "accessDate": "2009-11-24T21:28:54Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "0378-4169",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "CiteULike",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [
                {
                    "tag": "named entity recognition (NER)"
                }
            ],
            "collections": [
                "J7QCXB2X"
            ],
            "relations": {},
            "dateAdded": "2010-01-19T22:31:37Z",
            "dateModified": "2010-01-19T22:31:37Z"
        }
    },
    {
        "key": "5VP4BFK4",
        "version": 1,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/5VP4BFK4",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/5VP4BFK4",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 33222,
                "username": "tpacker",
                "name": "Thomas L. Packer",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/tpacker",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Embley et al.",
            "parsedDate": "1999-11",
            "numChildren": 3
        },
        "data": {
            "key": "5VP4BFK4",
            "version": 1,
            "itemType": "journalArticle",
            "title": "Conceptual-model-based data extraction from multiple-record Web pages",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "D. W.",
                    "lastName": "Embley"
                },
                {
                    "creatorType": "author",
                    "firstName": "D. M.",
                    "lastName": "Campbell"
                },
                {
                    "creatorType": "author",
                    "firstName": "Y. S.",
                    "lastName": "Jiang"
                },
                {
                    "creatorType": "author",
                    "firstName": "S. W.",
                    "lastName": "Liddle"
                },
                {
                    "creatorType": "author",
                    "firstName": "D. W.",
                    "lastName": "Lonsdale"
                },
                {
                    "creatorType": "author",
                    "firstName": "Y. -K.",
                    "lastName": "Ng"
                },
                {
                    "creatorType": "author",
                    "firstName": "R. D.",
                    "lastName": "Smith"
                }
            ],
            "abstractNote": "Electronically available data on the Web is exploding at an ever increasing pace. Much of this data is unstructured, which makes searching hard and traditional database querying impossible. Many Web documents, however, contain an abundance of recognizable constants that together describe the essence of a document's content. For these kinds of data-rich, multiple-record documents (e.g., advertisements, movie reviews, weather reports, travel information, sports summaries, financial statements, obituaries, and many others) we can apply a conceptual-modeling approach to extract and structure data automatically. The approach is based on an ontology - a conceptual model instance - that describes the data of interest, including relationships, lexical appearance, and context keywords. By parsing the ontology, we can automatically produce a database scheme and recognizers for constants and keywords, and then invoke routines to recognize and extract data from unstructured documents and structure it according to the generated database scheme. Experiments show that it is possible to achieve good recall and precision ratios for documents that are rich in recognizable constants and narrow in ontological breadth. Our approach is less labor-intensive than other approaches that manually or semiautomatically generate wrappers, and it is generally insensitive to changes in Web-page format.",
            "publicationTitle": "Data & Knowledge Engineering",
            "publisher": "",
            "place": "",
            "date": "November 1999",
            "volume": "31",
            "issue": "3",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "227-251",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "",
            "DOI": "10.1016/S0169-023X(99)00027-0",
            "citationKey": "",
            "url": "http://www.sciencedirect.com/science/article/B6TYX-3XJKBTJ-2/2/bf114bfa25560bf29d4c171bb2340af1",
            "accessDate": "2009-11-19T22:44:12Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "0169-023X",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "ScienceDirect",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [
                {
                    "tag": "David W. Embley"
                },
                {
                    "tag": "World Wide Web (WWW)"
                },
                {
                    "tag": "information extraction (IE)"
                },
                {
                    "tag": "obituaries"
                },
                {
                    "tag": "ontologies"
                },
                {
                    "tag": "unstructured data"
                }
            ],
            "collections": [
                "KX7BJAKM"
            ],
            "relations": {
                "owl:sameAs": "http://zotero.org/groups/9198/items/4TENDKHR"
            },
            "dateAdded": "2010-01-06T01:35:54Z",
            "dateModified": "2010-01-12T17:11:50Z"
        }
    },
    {
        "key": "8IETAMJB",
        "version": 1,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/8IETAMJB",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/8IETAMJB",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 33222,
                "username": "tpacker",
                "name": "Thomas L. Packer",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/tpacker",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Miller et al.",
            "parsedDate": "2000",
            "numChildren": 2
        },
        "data": {
            "key": "8IETAMJB",
            "version": 1,
            "itemType": "conferencePaper",
            "title": "Named entity extraction from noisy input: speech and OCR",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "D.",
                    "lastName": "Miller"
                },
                {
                    "creatorType": "author",
                    "firstName": "S.",
                    "lastName": "Boisen"
                },
                {
                    "creatorType": "author",
                    "firstName": "R.",
                    "lastName": "Schwartz"
                },
                {
                    "creatorType": "author",
                    "firstName": "R.",
                    "lastName": "Stone"
                },
                {
                    "creatorType": "author",
                    "firstName": "R.",
                    "lastName": "Weischedel"
                }
            ],
            "abstractNote": "",
            "proceedingsTitle": "Proceedings of ANLP-NAACL 2000",
            "conferenceName": "",
            "publisher": "",
            "place": "",
            "date": "2000",
            "eventPlace": "",
            "volume": "",
            "issue": "",
            "numberOfVolumes": "",
            "pages": "316-324",
            "series": "",
            "seriesNumber": "",
            "DOI": "",
            "ISBN": "",
            "citationKey": "",
            "url": "",
            "accessDate": "",
            "ISSN": "",
            "archive": "Google Scholar",
            "archiveLocation": "",
            "shortTitle": "Named entity extraction from noisy input",
            "language": "",
            "libraryCatalog": "",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [
                {
                    "tag": "**"
                },
                {
                    "tag": "OCR Extraction"
                },
                {
                    "tag": "Read"
                }
            ],
            "collections": [
                "KX7BJAKM"
            ],
            "relations": {
                "owl:sameAs": "http://zotero.org/groups/9198/items/7IKNWWBC"
            },
            "dateAdded": "2010-01-06T01:35:54Z",
            "dateModified": "2010-01-12T17:11:47Z"
        }
    },
    {
        "key": "2SCX8EFZ",
        "version": 1,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/2SCX8EFZ",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/2SCX8EFZ",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 33222,
                "username": "tpacker",
                "name": "Thomas L. Packer",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/tpacker",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Kolak and Resnik",
            "parsedDate": "2002",
            "numChildren": 2
        },
        "data": {
            "key": "2SCX8EFZ",
            "version": 1,
            "itemType": "conferencePaper",
            "title": "OCR error correction using a noisy channel model",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Okan",
                    "lastName": "Kolak"
                },
                {
                    "creatorType": "author",
                    "firstName": "Philip",
                    "lastName": "Resnik"
                }
            ],
            "abstractNote": "",
            "proceedingsTitle": "Proceedings of the second international conference on Human Language Technology Research",
            "conferenceName": "",
            "publisher": "Morgan Kaufmann Publishers Inc.",
            "place": "San Diego, California",
            "date": "2002",
            "eventPlace": "",
            "volume": "",
            "issue": "",
            "numberOfVolumes": "",
            "pages": "257-262",
            "series": "",
            "seriesNumber": "",
            "DOI": "",
            "ISBN": "",
            "citationKey": "",
            "url": "",
            "accessDate": "",
            "ISSN": "",
            "archive": "Google Scholar",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [
                {
                    "tag": "**"
                },
                {
                    "tag": "Noisy Channel"
                },
                {
                    "tag": "Not Read"
                },
                {
                    "tag": "OCR Error Correction"
                },
                {
                    "tag": "OCR Error Modeling"
                },
                {
                    "tag": "Parameter Estimation"
                },
                {
                    "tag": "Pattern Recognition"
                }
            ],
            "collections": [
                "KX7BJAKM"
            ],
            "relations": {
                "owl:sameAs": "http://zotero.org/groups/9198/items/XIJ4M59M"
            },
            "dateAdded": "2010-01-06T01:35:54Z",
            "dateModified": "2010-01-12T17:11:42Z"
        }
    },
    {
        "key": "5WU2FUUA",
        "version": 1,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/5WU2FUUA",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/5WU2FUUA",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 33222,
                "username": "tpacker",
                "name": "Thomas L. Packer",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/tpacker",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Ratinov and Roth",
            "parsedDate": "2009",
            "numChildren": 1
        },
        "data": {
            "key": "5WU2FUUA",
            "version": 1,
            "itemType": "conferencePaper",
            "title": "Design challenges and misconceptions in named entity recognition",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Lev",
                    "lastName": "Ratinov"
                },
                {
                    "creatorType": "author",
                    "firstName": "Dan",
                    "lastName": "Roth"
                }
            ],
            "abstractNote": "We analyze some of the fundamental design challenges and misconceptions that underlie the development of an efficient and robust NER system. In particular, we address issues such as the representation of text chunks, the inference approach needed to combine local NER decisions, the sources of prior knowledge and how to use them within an NER system. In the process of comparing several solutions to these challenges we reach some surprising conclusions, as well as develop an NER system that achieves 90.8 F1 score on the CoNLL-2003 NER shared task, the best reported result for this dataset.",
            "proceedingsTitle": "Proceedings of the Thirteenth Conference on Computational Natural Language Learning",
            "conferenceName": "",
            "publisher": "Association for Computational Linguistics",
            "place": "Boulder, Colorado",
            "date": "2009",
            "eventPlace": "",
            "volume": "",
            "issue": "",
            "numberOfVolumes": "",
            "pages": "147-155",
            "series": "",
            "seriesNumber": "",
            "DOI": "",
            "ISBN": "978-1-932432-29-9",
            "citationKey": "",
            "url": "http://portal.acm.org/citation.cfm?id=1596374.1596399",
            "accessDate": "2009-11-24T21:31:36Z",
            "ISSN": "",
            "archive": "ACM",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [],
            "collections": [
                "KX7BJAKM"
            ],
            "relations": {
                "owl:sameAs": "http://zotero.org/groups/9198/items/RQWFH3US"
            },
            "dateAdded": "2010-01-06T01:35:54Z",
            "dateModified": "2010-01-12T17:11:22Z"
        }
    },
    {
        "key": "2GKQAFMW",
        "version": 1,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/2GKQAFMW",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/2GKQAFMW",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 33222,
                "username": "tpacker",
                "name": "Thomas L. Packer",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/tpacker",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Wang et al.",
            "parsedDate": "2009",
            "numChildren": 2
        },
        "data": {
            "key": "2GKQAFMW",
            "version": 1,
            "itemType": "conferencePaper",
            "title": "Efficient approximate entity extraction with edit distance constraints",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Wei",
                    "lastName": "Wang"
                },
                {
                    "creatorType": "author",
                    "firstName": "Chuan",
                    "lastName": "Xiao"
                },
                {
                    "creatorType": "author",
                    "firstName": "Xuemin",
                    "lastName": "Lin"
                },
                {
                    "creatorType": "author",
                    "firstName": "Chengqi",
                    "lastName": "Zhang"
                }
            ],
            "abstractNote": "Named entity recognition aims at extracting named entities from unstructured text. A recent trend of named entity recognition is finding approximate matches in the text with respect to a large dictionary of known entities, as the domain knowledge encoded in the dictionary helps to improve the extraction performance.",
            "proceedingsTitle": "Proceedings of the 35th SIGMOD international conference on Management of data",
            "conferenceName": "",
            "publisher": "ACM",
            "place": "Providence, Rhode Island, USA",
            "date": "2009",
            "eventPlace": "",
            "volume": "",
            "issue": "",
            "numberOfVolumes": "",
            "pages": "759-770",
            "series": "",
            "seriesNumber": "",
            "DOI": "10.1145/1559845.1559925",
            "ISBN": "978-1-60558-551-2",
            "citationKey": "",
            "url": "http://portal.acm.org/citation.cfm?id=1559845.1559925",
            "accessDate": "2009-10-22T16:29:32Z",
            "ISSN": "",
            "archive": "ACM",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [
                {
                    "tag": "**"
                },
                {
                    "tag": "To Read"
                },
                {
                    "tag": "approximate dictionary matching"
                },
                {
                    "tag": "edit distance"
                },
                {
                    "tag": "named entity recognition (NER)"
                }
            ],
            "collections": [
                "KX7BJAKM"
            ],
            "relations": {
                "owl:sameAs": "http://zotero.org/groups/9198/items/R4HG5MXA"
            },
            "dateAdded": "2010-01-06T01:35:54Z",
            "dateModified": "2010-01-12T17:11:19Z"
        }
    },
    {
        "key": "SFN4M7SC",
        "version": 1,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/SFN4M7SC",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/SFN4M7SC",
                "type": "text/html"
            },
            "up": {
                "href": "https://api.zotero.org/groups/9554/items/B8AKXCP8",
                "type": "application/json"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 33222,
                "username": "tpacker",
                "name": "Thomas L. Packer",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/tpacker",
                        "type": "text/html"
                    }
                }
            },
            "numChildren": 0
        },
        "data": {
            "key": "SFN4M7SC",
            "version": 1,
            "parentItem": "B8AKXCP8",
            "itemType": "attachment",
            "linkMode": "imported_url",
            "title": "p283-adelberg.pdf",
            "accessDate": "2009-10-13T15:56:57Z",
            "url": "http://delivery.acm.org/10.1145/280000/276330/p283-adelberg.pdf?key1=276330&key2=7219445521&coll=GUIDE&dl=GUIDE&CFID=57532152&CFTOKEN=98434533",
            "note": "",
            "contentType": "application/pdf",
            "charset": "",
            "filename": "p283-adelberg.pdf",
            "md5": "ad45dab4efdd646d6459d3f8a856264f",
            "mtime": 2147483647,
            "tags": [],
            "relations": {},
            "dateAdded": "2010-01-12T17:08:27Z",
            "dateModified": "2010-01-12T17:08:27Z"
        }
    },
    {
        "key": "B8AKXCP8",
        "version": 1,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/B8AKXCP8",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/B8AKXCP8",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 33222,
                "username": "tpacker",
                "name": "Thomas L. Packer",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/tpacker",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Adelberg",
            "parsedDate": "1998",
            "numChildren": 2
        },
        "data": {
            "key": "B8AKXCP8",
            "version": 1,
            "itemType": "journalArticle",
            "title": "A tool for Semi-automatically Extracting Structured and Semistructured Data from Text Documents",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Brad",
                    "lastName": "Adelberg"
                }
            ],
            "abstractNote": "Often interesting structured or semistructured data is not in database systems but in HTML pages, text files, or on paper. The data in these formats is not usable by standard query processing engines and hence users need a way of extracting data from these sources into a DBMS or of writing wrappers around the sources. This paper describes NoDoSE, the Northwestern Document Structure Extractor, which is an interactive tool for semi-automatically determining the structure of such documents and then extracting their data. Using a GUI, the user hierarchically decomposes the file, outlining its interesting regions and then describing their semantics. This task is expedited by a mining component that attempts to infer the grammar of the file from the information the user has input so far. Once the format of a document has been determined, its data can be extracted into a number of useful forms. This paper describes both the NoDoSE architecture, which can be used as a test bed for structure mining algorithms in general, and the mining algorithms that have been developed by the author. The prototype, which is written in Java, is described and experiences parsing a variety of documents are reported.",
            "publicationTitle": "SIGMOD Rec.",
            "publisher": "",
            "place": "",
            "date": "1998",
            "volume": "27",
            "issue": "2",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "283-294",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "",
            "DOI": "10.1145/276305.276330",
            "citationKey": "",
            "url": "http://portal.acm.org/citation.cfm?id=276330",
            "accessDate": "2009-10-13T15:56:51Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "ACM",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [
                {
                    "tag": "*"
                },
                {
                    "tag": "OCR Extraction"
                },
                {
                    "tag": "To Read"
                },
                {
                    "tag": "document structure learning"
                },
                {
                    "tag": "information extraction (IE)"
                },
                {
                    "tag": "named entity recognition (NER)"
                }
            ],
            "collections": [
                "KX7BJAKM"
            ],
            "relations": {},
            "dateAdded": "2010-01-12T17:08:27Z",
            "dateModified": "2010-01-12T17:08:27Z"
        }
    },
    {
        "key": "R8PSA3D7",
        "version": 1,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/R8PSA3D7",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/R8PSA3D7",
                "type": "text/html"
            },
            "up": {
                "href": "https://api.zotero.org/groups/9554/items/3HZEBMIB",
                "type": "application/json"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 33222,
                "username": "tpacker",
                "name": "Thomas L. Packer",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/tpacker",
                        "type": "text/html"
                    }
                }
            }
        },
        "data": {
            "key": "R8PSA3D7",
            "version": 1,
            "parentItem": "3HZEBMIB",
            "itemType": "attachment",
            "linkMode": "linked_url",
            "title": "CiteSeerX — Named Entity Extraction from Broadcast News",
            "accessDate": "2009-10-12T18:22:24Z",
            "url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.27.3193",
            "note": "",
            "contentType": "text/html",
            "charset": "utf-8",
            "tags": [],
            "relations": {},
            "dateAdded": "2010-01-12T17:07:46Z",
            "dateModified": "2010-01-12T17:07:46Z"
        }
    },
    {
        "key": "3HZEBMIB",
        "version": 1,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/3HZEBMIB",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/3HZEBMIB",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 33222,
                "username": "tpacker",
                "name": "Thomas L. Packer",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/tpacker",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Miller et al.",
            "parsedDate": "1999",
            "numChildren": 1
        },
        "data": {
            "key": "3HZEBMIB",
            "version": 1,
            "itemType": "journalArticle",
            "title": "Named Entity Extraction from Broadcast News",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "David",
                    "lastName": "Miller"
                },
                {
                    "creatorType": "author",
                    "firstName": "Richard",
                    "lastName": "Schwartz"
                },
                {
                    "creatorType": "author",
                    "firstName": "Ralph",
                    "lastName": "Weischedel"
                },
                {
                    "creatorType": "author",
                    "firstName": "Rebecca",
                    "lastName": "Stone"
                }
            ],
            "abstractNote": "",
            "publicationTitle": "IN PROCEEDINGS OF THE DARPA BROADCAST NEWS WORKSHOP",
            "publisher": "",
            "place": "",
            "date": "1999",
            "volume": "",
            "issue": "",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "37-40",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "",
            "DOI": "",
            "citationKey": "",
            "url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.27.3193",
            "accessDate": "2009-10-12T18:20:33Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [],
            "collections": [
                "KX7BJAKM"
            ],
            "relations": {},
            "dateAdded": "2010-01-12T17:07:46Z",
            "dateModified": "2010-01-12T17:07:46Z"
        }
    },
    {
        "key": "ZJNUWCTE",
        "version": 1,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/ZJNUWCTE",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/ZJNUWCTE",
                "type": "text/html"
            },
            "up": {
                "href": "https://api.zotero.org/groups/9554/items/8JTR5IJV",
                "type": "application/json"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 33222,
                "username": "tpacker",
                "name": "Thomas L. Packer",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/tpacker",
                        "type": "text/html"
                    }
                }
            }
        },
        "data": {
            "key": "ZJNUWCTE",
            "version": 1,
            "parentItem": "8JTR5IJV",
            "itemType": "attachment",
            "linkMode": "linked_url",
            "title": "W96-0108.pdf (application/pdf Object)",
            "accessDate": "2009-07-27T16:33:05Z",
            "url": "http://acl.ldc.upenn.edu/W/W96/W96-0108.pdf",
            "note": "",
            "contentType": "application/pdf",
            "charset": "windows-1252",
            "tags": [],
            "relations": {
                "owl:sameAs": "http://zotero.org/groups/9198/items/HRFARDXH"
            },
            "dateAdded": "2010-01-06T01:35:54Z",
            "dateModified": "2010-01-06T01:35:54Z"
        }
    },
    {
        "key": "E46HNBAI",
        "version": 1,
        "library": {
            "type": "group",
            "id": 9554,
            "name": "NLP Group at Brigham Young University",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/9554/items/E46HNBAI",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/nlp_group_at_brigham_young_university/items/E46HNBAI",
                "type": "text/html"
            },
            "up": {
                "href": "https://api.zotero.org/groups/9554/items/PAKPU8Q9",
                "type": "application/json"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 33222,
                "username": "tpacker",
                "name": "Thomas L. Packer",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/tpacker",
                        "type": "text/html"
                    }
                }
            },
            "numChildren": 0
        },
        "data": {
            "key": "E46HNBAI",
            "version": 1,
            "parentItem": "PAKPU8Q9",
            "itemType": "attachment",
            "linkMode": "imported_url",
            "title": "p283-adelberg.pdf",
            "accessDate": "2009-10-13T15:56:57Z",
            "url": "http://delivery.acm.org/10.1145/280000/276330/p283-adelberg.pdf?key1=276330&key2=7219445521&coll=GUIDE&dl=GUIDE&CFID=57532152&CFTOKEN=98434533",
            "note": "",
            "contentType": "application/pdf",
            "charset": "",
            "filename": "p283-adelberg.pdf",
            "md5": "ad45dab4efdd646d6459d3f8a856264f",
            "mtime": 2147483647,
            "tags": [],
            "relations": {
                "owl:sameAs": "http://zotero.org/groups/9198/items/ZG4IUGIA"
            },
            "dateAdded": "2010-01-06T01:35:54Z",
            "dateModified": "2010-01-06T01:35:54Z"
        }
    }
]