[
    {
        "key": "QSKQBVKN",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/QSKQBVKN",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/QSKQBVKN",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Schbath et al.",
            "parsedDate": "1995",
            "numChildren": 0
        },
        "data": {
            "key": "QSKQBVKN",
            "version": 1,
            "itemType": "journalArticle",
            "title": "Exceptional Motifs in Different Markov Chain Models for a Statistical Analysis of DNA Sequences",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Sophie",
                    "lastName": "Schbath"
                },
                {
                    "creatorType": "author",
                    "firstName": "Bernard",
                    "lastName": "Prum"
                },
                {
                    "creatorType": "author",
                    "firstName": "Elisabeth",
                    "lastName": "De Turckheim"
                }
            ],
            "abstractNote": "",
            "publicationTitle": "Journal of Computational Biology",
            "publisher": "",
            "place": "",
            "date": "01/1995",
            "volume": "2",
            "issue": "3",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "417-437",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "Journal of Computational Biology",
            "DOI": "10.1089/cmb.1995.2.417",
            "citationKey": "",
            "url": "http://www.liebertonline.com/doi/abs/10.1089/cmb.1995.2.417",
            "accessDate": "2011-05-09T01:28:05Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "1066-5277",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "CrossRef",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:34Z",
            "dateModified": "2011-07-27T01:16:34Z"
        }
    },
    {
        "key": "QADPPZZH",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/QADPPZZH",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/QADPPZZH",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Schbath",
            "parsedDate": "2000",
            "numChildren": 0
        },
        "data": {
            "key": "QADPPZZH",
            "version": 1,
            "itemType": "journalArticle",
            "title": "An Overview on the Distribution of Word Counts in Markov Chains",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Sophie",
                    "lastName": "Schbath"
                }
            ],
            "abstractNote": "",
            "publicationTitle": "Journal of Computational Biology",
            "publisher": "",
            "place": "",
            "date": "02/2000",
            "volume": "7",
            "issue": "1-2",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "193-201",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "Journal of Computational Biology",
            "DOI": "10.1089/10665270050081469",
            "citationKey": "",
            "url": "http://www.liebertonline.com/doi/abs/10.1089/10665270050081469",
            "accessDate": "2011-05-09T01:28:05Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "1066-5277",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "CrossRef",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:34Z",
            "dateModified": "2011-07-27T01:16:34Z"
        }
    },
    {
        "key": "XV7AHPZT",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/XV7AHPZT",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/XV7AHPZT",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Robin and Schbath",
            "parsedDate": "2001",
            "numChildren": 0
        },
        "data": {
            "key": "XV7AHPZT",
            "version": 1,
            "itemType": "journalArticle",
            "title": "Numerical Comparison of Several Approximations of the Word Count Distribution in Random Sequences",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Stéphane",
                    "lastName": "Robin"
                },
                {
                    "creatorType": "author",
                    "firstName": "Sophie",
                    "lastName": "Schbath"
                }
            ],
            "abstractNote": "",
            "publicationTitle": "Journal of Computational Biology",
            "publisher": "",
            "place": "",
            "date": "09/2001",
            "volume": "8",
            "issue": "4",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "349-359",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "Journal of Computational Biology",
            "DOI": "10.1089/106652701752236179",
            "citationKey": "",
            "url": "http://www.liebertonline.com/doi/abs/10.1089/106652701752236179",
            "accessDate": "2011-05-09T01:28:05Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "1066-5277",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "CrossRef",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:34Z",
            "dateModified": "2011-07-27T01:16:34Z"
        }
    },
    {
        "key": "DRAMFQMG",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/DRAMFQMG",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/DRAMFQMG",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Reinert et al.",
            "parsedDate": "2000",
            "numChildren": 0
        },
        "data": {
            "key": "DRAMFQMG",
            "version": 1,
            "itemType": "journalArticle",
            "title": "Probabilistic and Statistical Properties of Words: An Overview",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Gesine",
                    "lastName": "Reinert"
                },
                {
                    "creatorType": "author",
                    "firstName": "Sophie",
                    "lastName": "Schbath"
                },
                {
                    "creatorType": "author",
                    "firstName": "Michael S.",
                    "lastName": "Waterman"
                }
            ],
            "abstractNote": "",
            "publicationTitle": "Journal of Computational Biology",
            "publisher": "",
            "place": "",
            "date": "02/2000",
            "volume": "7",
            "issue": "1-2",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "1-46",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "Journal of Computational Biology",
            "DOI": "10.1089/10665270050081360",
            "citationKey": "",
            "url": "http://www.liebertonline.com/doi/abs/10.1089/10665270050081360",
            "accessDate": "2011-05-09T01:28:05Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "1066-5277",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "Probabilistic and Statistical Properties of Words",
            "language": "",
            "libraryCatalog": "CrossRef",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:34Z",
            "dateModified": "2011-07-27T01:16:34Z"
        }
    },
    {
        "key": "UT4I5TER",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/UT4I5TER",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/UT4I5TER",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Reinert et al.",
            "parsedDate": "2009",
            "numChildren": 0
        },
        "data": {
            "key": "UT4I5TER",
            "version": 1,
            "itemType": "journalArticle",
            "title": "Alignment-Free Sequence Comparison (I): Statistics and Power",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Gesine",
                    "lastName": "Reinert"
                },
                {
                    "creatorType": "author",
                    "firstName": "David",
                    "lastName": "Chew"
                },
                {
                    "creatorType": "author",
                    "firstName": "Fengzhu",
                    "lastName": "Sun"
                },
                {
                    "creatorType": "author",
                    "firstName": "Michael S.",
                    "lastName": "Waterman"
                }
            ],
            "abstractNote": "",
            "publicationTitle": "Journal of Computational Biology",
            "publisher": "",
            "place": "",
            "date": "12/2009",
            "volume": "16",
            "issue": "12",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "1615-1634",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "Journal of Computational Biology",
            "DOI": "10.1089/cmb.2009.0198",
            "citationKey": "",
            "url": "http://www.liebertonline.com/doi/abs/10.1089/cmb.2009.0198",
            "accessDate": "2011-05-09T01:28:05Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "1066-5277",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "Alignment-Free Sequence Comparison (I)",
            "language": "",
            "libraryCatalog": "CrossRef",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:34Z",
            "dateModified": "2011-07-27T01:16:34Z"
        }
    },
    {
        "key": "2VCGBXH9",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/2VCGBXH9",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/2VCGBXH9",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Qi et al.",
            "parsedDate": "2004-01-01",
            "numChildren": 0
        },
        "data": {
            "key": "2VCGBXH9",
            "version": 1,
            "itemType": "journalArticle",
            "title": "Whole Proteome Prokaryote Phylogeny Without Sequence Alignment: A K-String Composition Approach",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Ji",
                    "lastName": "Qi"
                },
                {
                    "creatorType": "author",
                    "firstName": "Bin",
                    "lastName": "Wang"
                },
                {
                    "creatorType": "author",
                    "firstName": "Bai-Iin",
                    "lastName": "Hao"
                }
            ],
            "abstractNote": "A systematic way of inferring evolutionary relatedness of microbial organisms from the oligopeptide content, i.e., frequency of amino acid K-strings in their complete proteomes, is proposed. The new method circumvents the ambiguity of choosing the genes for phylogenetic reconstruction and avoids the necessity of aligning sequences of essentially different length and gene content. The only “parameter” in the method is the length K of the oligopeptides, which serves to tune the “resolution power” of the method. The topology of the trees converges with K increasing. Applied to a total of 109 organisms, including 16 Archaea, 87 Bacteria, and 6 Eukarya, it yields an unrooted tree that agrees with the biologists’ “tree of life” based on SSU rRNA comparison in a majority of basic branchings, and especially, in all lower taxa.",
            "publicationTitle": "Journal of Molecular Evolution",
            "publisher": "",
            "place": "",
            "date": "January 01, 2004",
            "volume": "58",
            "issue": "1",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "1-11",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "",
            "DOI": "10.1007/s00239-003-2493-7",
            "citationKey": "",
            "url": "http://dx.doi.org/10.1007/s00239-003-2493-7",
            "accessDate": "2009-07-06T06:55:06Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "Whole Proteome Prokaryote Phylogeny Without Sequence Alignment",
            "language": "",
            "libraryCatalog": "SpringerLink",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:34Z",
            "dateModified": "2011-07-27T01:16:34Z"
        }
    },
    {
        "key": "7RT5TT34",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/7RT5TT34",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/7RT5TT34",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Pinheiro et al.",
            "parsedDate": "2005-03-01",
            "numChildren": 0
        },
        "data": {
            "key": "7RT5TT34",
            "version": 1,
            "itemType": "journalArticle",
            "title": "Comparison of genomic sequences using the Hamming distance",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Hildete Prisco",
                    "lastName": "Pinheiro"
                },
                {
                    "creatorType": "author",
                    "firstName": "Aluísio",
                    "lastName": "de Souza Pinheiro"
                },
                {
                    "creatorType": "author",
                    "firstName": "Pranab Kumar",
                    "lastName": "Sen"
                }
            ],
            "abstractNote": "The paper considers the problem of homogeneity among groups by comparison of genomic sequences. Some alternative procedures that attach less emphasis on the likelihood approach, and more on alternative measures that deal with similar homogeneity problems are considered here. On this approach, a one-sided hypothesis test is considered and the classical ANOVA decomposition can be directly adapted to sample measures based on the Hamming distance, without necessarily going through their second moments. Some results of U-statistics theory will be useful for the decomposition of the test statistic and to find its asymptotic distribution. An application of this test with real data is shown and the p-value of the test statistic is found via bootstrap resampling.",
            "publicationTitle": "Journal of Statistical Planning and Inference",
            "publisher": "",
            "place": "",
            "date": "March 1, 2005",
            "volume": "130",
            "issue": "1-2",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "325-339",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "",
            "DOI": "10.1016/j.jspi.2003.03.002",
            "citationKey": "",
            "url": "http://www.sciencedirect.com/science/article/B6V0M-4D01JYF-1/2/6b8b488a1f627413e6fa49959d67e564",
            "accessDate": "2010-05-12T09:09:31Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "0378-3758",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "ScienceDirect",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [
                {
                    "tag": "Amino acid",
                    "type": 1
                },
                {
                    "tag": "Asymptotic distribution",
                    "type": 1
                },
                {
                    "tag": "Bootstrap",
                    "type": 1
                },
                {
                    "tag": "Categorical data",
                    "type": 1
                },
                {
                    "tag": "Genome",
                    "type": 1
                },
                {
                    "tag": "Hamming distance",
                    "type": 1
                },
                {
                    "tag": "Nonparametric",
                    "type": 1
                },
                {
                    "tag": "Nucleotide",
                    "type": 1
                },
                {
                    "tag": "Statistical genetics",
                    "type": 1
                },
                {
                    "tag": "U-statistics",
                    "type": 1
                }
            ],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:34Z",
            "dateModified": "2011-07-27T01:16:34Z"
        }
    },
    {
        "key": "E85TARUA",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/E85TARUA",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/E85TARUA",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Mrázek",
            "parsedDate": "2009-05-01",
            "numChildren": 0
        },
        "data": {
            "key": "E85TARUA",
            "version": 1,
            "itemType": "journalArticle",
            "title": "Phylogenetic Signals in DNA Composition: Limitations and Prospects",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Jan",
                    "lastName": "Mrázek"
                }
            ],
            "abstractNote": "The concept of genome signature allows sequence comparisons without alignment. It relies on the premise that oligonucleotide compositions of DNA segments from the same or closely related genomes tend to be more similar than those from distantly related genomes. This concept has been used in detection of lateral gene transfer, phylogenetic classification of metagenome sequences (binning), and in studies of evolution of viruses and plasmids. The goal of this work is to explore limitations of genome signature in phylogenetic classification of DNA sequences and to identify formal representations of genome signature that expose best the phylogenetic relationships among prokaryotes. We found that genome signatures that best represent phylogenetic relationships are those normalized to factor out differences in G + C content and utilizing the standard A-C-G-T alphabet or the degenerate R-Y (purine–pyrimidine) alphabet. The main limitation of all genome signature representations tested is lack of divergence among some distantly related species. “Crowding” of the genome signature space and absence of molecular clock likely contribute to this phenomenon. We introduce “periodicity signatures”—formal representations of periodic sequence patterns related to DNA curvature—which can discriminate between bacterial and archaeal DNA sequences. Interestingly, archaea of the order Halobacteriaceae have periodic signatures similar to bacteria, possibly due to their early divergence from other archaea, extensive lateral gene transfer, or due to their adaptation to high salt environments. Our results have practical implications for development and application of genome signature–based methods for analysis and classification of DNA sequences.",
            "publicationTitle": "Molecular Biology and Evolution",
            "publisher": "",
            "place": "",
            "date": "May 01 , 2009",
            "volume": "26",
            "issue": "5",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "1163 -1169",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "",
            "DOI": "10.1093/molbev/msp032",
            "citationKey": "",
            "url": "http://mbe.oxfordjournals.org/content/26/5/1163.abstract",
            "accessDate": "2011-05-09T01:14:08Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "Phylogenetic Signals in DNA Composition",
            "language": "",
            "libraryCatalog": "Highwire 2.0",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:34Z",
            "dateModified": "2011-07-27T01:16:34Z"
        }
    },
    {
        "key": "EBKH7VKJ",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/EBKH7VKJ",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/EBKH7VKJ",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Marçais and Kingsford",
            "parsedDate": "2011-03-15",
            "numChildren": 0
        },
        "data": {
            "key": "EBKH7VKJ",
            "version": 1,
            "itemType": "journalArticle",
            "title": "A fast, lock-free approach for efficient parallel counting of occurrences of k-mers",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Guillaume",
                    "lastName": "Marçais"
                },
                {
                    "creatorType": "author",
                    "firstName": "Carl",
                    "lastName": "Kingsford"
                }
            ],
            "abstractNote": "Motivation: Counting the number of occurrences of every k-mer (substring of length k) in a long string is a central subproblem in many applications, including genome assembly, error correction of sequencing reads, fast multiple sequence alignment and repeat detection. Recently, the deep sequence coverage generated by next-generation sequencing technologies has caused the amount of sequence to be processed during a genome project to grow rapidly, and has rendered current k-mer counting tools too slow and memory intensive. At the same time, large multicore computers have become commonplace in research facilities allowing for a new parallel computational paradigm.Results: We propose a new k-mer counting algorithm and associated implementation, called Jellyfish, which is fast and memory efficient. It is based on a multithreaded, lock-free hash table optimized for counting k-mers up to 31 bases in length. Due to their flexibility, suffix arrays have been the data structure of choice for solving many string problems. For the task of k-mer counting, important in many biological applications, Jellyfish offers a much faster and more memory-efficient solution.Availability: The Jellyfish software is written in C++ and is GPL licensed. It is available for download at http://www.cbcb.umd.edu/software/jellyfish.Contact: gmarcais@umd.eduSupplementary information: Supplementary data are available at Bioinformatics online.",
            "publicationTitle": "Bioinformatics",
            "publisher": "",
            "place": "",
            "date": "March 15 , 2011",
            "volume": "27",
            "issue": "6",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "764 -770",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "",
            "DOI": "10.1093/bioinformatics/btr011",
            "citationKey": "",
            "url": "http://bioinformatics.oxfordjournals.org/content/27/6/764.abstract",
            "accessDate": "2011-06-09T01:00:33Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "Highwire 2.0",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:34Z",
            "dateModified": "2011-07-27T01:16:34Z"
        }
    },
    {
        "key": "PWHPKJ8M",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/PWHPKJ8M",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/PWHPKJ8M",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Kurtz et al.",
            "parsedDate": "2008",
            "numChildren": 0
        },
        "data": {
            "key": "PWHPKJ8M",
            "version": 1,
            "itemType": "journalArticle",
            "title": "A new method to compute K-mer frequencies and its application to annotate large repetitive plant genomes",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "S.",
                    "lastName": "Kurtz"
                },
                {
                    "creatorType": "author",
                    "firstName": "A.",
                    "lastName": "Narechania"
                },
                {
                    "creatorType": "author",
                    "firstName": "J. C",
                    "lastName": "Stein"
                },
                {
                    "creatorType": "author",
                    "firstName": "D.",
                    "lastName": "Ware"
                }
            ],
            "abstractNote": "",
            "publicationTitle": "BMC genomics",
            "publisher": "",
            "place": "",
            "date": "2008",
            "volume": "9",
            "issue": "1",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "517",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "",
            "DOI": "",
            "citationKey": "",
            "url": "",
            "accessDate": "",
            "PMID": "",
            "PMCID": "",
            "ISSN": "",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "Google Scholar",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:24Z",
            "dateModified": "2011-07-27T01:16:24Z"
        }
    },
    {
        "key": "TG4QZA2R",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/TG4QZA2R",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/TG4QZA2R",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Karlin and Brendel",
            "parsedDate": "1992-07-03",
            "numChildren": 0
        },
        "data": {
            "key": "TG4QZA2R",
            "version": 1,
            "itemType": "journalArticle",
            "title": "Chance and statistical significance in protein and DNA sequence analysis",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "S",
                    "lastName": "Karlin"
                },
                {
                    "creatorType": "author",
                    "firstName": "V",
                    "lastName": "Brendel"
                }
            ],
            "abstractNote": "Statistical approaches help in the determination of significant configurations in protein and nucleic acid sequence data. Three recent statistical methods are discussed: (i) score-based sequence analysis that provides a means for characterizing anomalies in local sequence text and for evaluating sequence comparisons; (ii) quantile distributions of amino acid usage that reveal general compositional biases in proteins and evolutionary relations; and (iii) r-scan statistics that can be applied to the analysis of spacings of sequence markers.",
            "publicationTitle": "Science",
            "publisher": "",
            "place": "",
            "date": "July 03 , 1992",
            "volume": "257",
            "issue": "5066",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "39 -49",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "",
            "DOI": "10.1126/science.1621093",
            "citationKey": "",
            "url": "http://www.sciencemag.org/content/257/5066/39.abstract",
            "accessDate": "2011-05-16T05:02:26Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "Highwire 2.0",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:24Z",
            "dateModified": "2011-07-27T01:16:24Z"
        }
    },
    {
        "key": "VMG5ACHQ",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/VMG5ACHQ",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/VMG5ACHQ",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Kariin and Burge",
            "parsedDate": "1995-07",
            "numChildren": 0
        },
        "data": {
            "key": "VMG5ACHQ",
            "version": 1,
            "itemType": "journalArticle",
            "title": "Dinucleotide relative abundance extremes: a genomic signature",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Samuel",
                    "lastName": "Kariin"
                },
                {
                    "creatorType": "author",
                    "firstName": "Chris",
                    "lastName": "Burge"
                }
            ],
            "abstractNote": "Early biochemical experiments established that the set of dinucleotide odds ratios or [`]general design' is a remarkably stable property of the DNA of an organism, which is essentially the same in protein-coding DNA, bulk genomic DNA, and in different renaturation rale and density gradient fractions of genomic DNA in many organisms. Analysis of currently available genomic sequence data has extended these earlier results, showing that the general designs of disjoint samples of a genome are substantially more similar to each other than to those of sequences from other organisms and that closely related organisms have similar general designs. From this perspective, the set of dinucleotide odds ratio (relative abundance) values constitute a signature of each DNA genome, which can discriminate between sequences from different organisms. Dinucleotide-odds ratio values appear to reflect not only the chemistry of dinucleotide stacking energies and base-step conformational preferences, but also the species-specific properties of DNA modification, replication and repair mechanisms.",
            "publicationTitle": "Trends in Genetics",
            "publisher": "",
            "place": "",
            "date": "July 1995",
            "volume": "11",
            "issue": "7",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "283-290",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "",
            "DOI": "10.1016/S0168-9525(00)89076-9",
            "citationKey": "",
            "url": "http://www.sciencedirect.com/science/article/B6TCY-40YSS8J-45/2/bddfc62163ecabb5bd5a9b12367dadc1",
            "accessDate": "2010-02-26T04:51:31Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "0168-9525",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "Dinucleotide relative abundance extremes",
            "language": "",
            "libraryCatalog": "ScienceDirect",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:24Z",
            "dateModified": "2011-07-27T01:16:24Z"
        }
    },
    {
        "key": "ETHQNQ53",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/ETHQNQ53",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/ETHQNQ53",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Lu et al.",
            "parsedDate": "1998",
            "numChildren": 0
        },
        "data": {
            "key": "ETHQNQ53",
            "version": 1,
            "itemType": "journalArticle",
            "title": "Characterizing self-similarity in bacteria DNA sequences",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Xin",
                    "lastName": "Lu"
                },
                {
                    "creatorType": "author",
                    "firstName": "Zhirong",
                    "lastName": "Sun"
                },
                {
                    "creatorType": "author",
                    "firstName": "Huimin",
                    "lastName": "Chen"
                },
                {
                    "creatorType": "author",
                    "firstName": "Yanda",
                    "lastName": "Li"
                }
            ],
            "abstractNote": "In this paper some parametric methods are introduced to characterize the self-similarity of DNA sequences. Compared with Fourier analysis, these methods perform statistically more stably and yield more reliable results. Using these methods, eight whole genomes of bacteria provided by NCBI are analyzed. Long-range correlation properties in the nucleotide density distribution along these DNA sequences are explored. Estimation results show that the long-range correlation structure prevails through the entire molecule of DNA. Higher order statistics through coarse graining reveal that rather than multifractal, there are only monofractal phenomena presented in the sequences. Hence, the nucleotide density distribution can be modeled asymptotically as fractional Gaussian noise. This result points to a new direction for analyzing and understanding the intrinsic structures of DNA sequences.",
            "publicationTitle": "Physical Review E",
            "publisher": "",
            "place": "",
            "date": "1998",
            "volume": "58",
            "issue": "3",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "3578",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "Phys. Rev. E",
            "DOI": "10.1103/PhysRevE.58.3578",
            "citationKey": "",
            "url": "http://link.aps.org/doi/10.1103/PhysRevE.58.3578",
            "accessDate": "2011-03-09T02:06:29Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "APS",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:24Z",
            "dateModified": "2011-07-27T01:16:24Z"
        }
    },
    {
        "key": "8FKBTSHT",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/8FKBTSHT",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/8FKBTSHT",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Lu et al.",
            "parsedDate": "2008",
            "numChildren": 0
        },
        "data": {
            "key": "8FKBTSHT",
            "version": 1,
            "itemType": "journalArticle",
            "title": "An improved string composition method for sequence comparison",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Guoqing",
                    "lastName": "Lu"
                },
                {
                    "creatorType": "author",
                    "firstName": "Shunpu",
                    "lastName": "Zhang"
                },
                {
                    "creatorType": "author",
                    "firstName": "Xiang",
                    "lastName": "Fang"
                }
            ],
            "abstractNote": "BACKGROUND:Historically, two categories of computational algorithms (alignment-based and alignment-free) have been applied to sequence comparison-one of the most fundamental issues in bioinformatics. Multiple sequence alignment, although dominantly used by biologists, possesses both fundamental as well as computational limitations. Consequently, alignment-free methods have been explored as important alternatives in estimating sequence similarity. Of the alignment-free methods, the string composition vector (CV) methods, which use the frequencies of nucleotide or amino acid strings to represent sequence information, show promising results in genome sequence comparison of prokaryotes. The existing CV-based methods, however, suffer certain statistical problems, thereby underestimating the amount of evolutionary information in genetic sequences.RESULTS:We show that the existing string composition based methods have two problems, one related to the Markov model assumption and the other associated with the denominator of the frequency normalization equation. We propose an improved complete composition vector method under the assumption of a uniform and independent model to estimate sequence information contributing to selection for sequence comparison. Phylogenetic analyses using both simulated and experimental data sets demonstrate that our new method is more robust compared with existing counterparts and comparable in robustness with alignment-based methods.CONCLUSION:We observed two problems existing in the currently used string composition methods and proposed a new robust method for the estimation of evolutionary information of genetic sequences. In addition, we discussed that it might not be necessary to use relatively long strings to build a complete composition vector (CCV), due to the overlapping nature of vector strings with a variable length. We suggested a practical approach for the choice of an optimal string length to construct the CCV.",
            "publicationTitle": "BMC Bioinformatics",
            "publisher": "",
            "place": "",
            "date": "2008",
            "volume": "9",
            "issue": "Suppl 6",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "S15",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "",
            "DOI": "10.1186/1471-2105-9-S6-S15",
            "citationKey": "",
            "url": "http://www.biomedcentral.com/1471-2105/9/S6/S15",
            "accessDate": "2009-07-06T06:46:20Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "1471-2105",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "BioMed Central and More",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [
                {
                    "tag": "k-mer frequencies"
                }
            ],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:24Z",
            "dateModified": "2011-07-27T01:16:24Z"
        }
    },
    {
        "key": "TEKIWS8T",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/TEKIWS8T",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/TEKIWS8T",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Liu and Wang",
            "parsedDate": "2010-10",
            "numChildren": 0
        },
        "data": {
            "key": "TEKIWS8T",
            "version": 1,
            "itemType": "journalArticle",
            "title": "Vector representations and related matrices of DNA primary sequence based on L-tuple",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Ying-zhao",
                    "lastName": "Liu"
                },
                {
                    "creatorType": "author",
                    "firstName": "Tian-ming",
                    "lastName": "Wang"
                }
            ],
            "abstractNote": "We consider to construct 4L-components vectors for a DNA primary sequence based on the L-tuple. For two DNA sequences, using the corresponding vectors, we construct a set of L × L matrices called related matrix. The mathematical characterization from the constructed matrices have been selected to characterize the degree of similarity between the two DNA sequences. The search for similar sequences of a query sequence from a database of 39 library sequences and the construction of phylogenetic tree of H5N1 avian influenza virus illustrate the utility of the matrices for DNA sequences.",
            "publicationTitle": "Mathematical Biosciences",
            "publisher": "",
            "place": "",
            "date": "October 2010",
            "volume": "227",
            "issue": "2",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "147-152",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "",
            "DOI": "10.1016/j.mbs.2010.07.004",
            "citationKey": "",
            "url": "http://www.sciencedirect.com/science/article/B6VHX-50P9H6F-1/2/5c4be07578025859af8788e3138b3f69",
            "accessDate": "2011-03-12T13:46:46Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "0025-5564",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "ScienceDirect",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [
                {
                    "tag": "H5N1 avian influenza virus",
                    "type": 1
                },
                {
                    "tag": "L-tuple",
                    "type": 1
                },
                {
                    "tag": "Numerical characterizations",
                    "type": 1
                },
                {
                    "tag": "Related matrix",
                    "type": 1
                },
                {
                    "tag": "Vector representation",
                    "type": 1
                }
            ],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:24Z",
            "dateModified": "2011-07-27T01:16:24Z"
        }
    },
    {
        "key": "H75XDIEJ",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/H75XDIEJ",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/H75XDIEJ",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Kong et al.",
            "parsedDate": "2009-06-09",
            "numChildren": 0
        },
        "data": {
            "key": "H75XDIEJ",
            "version": 1,
            "itemType": "journalArticle",
            "title": "Quantitative measure of randomness and order for complete genomes",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Sing-Guan",
                    "lastName": "Kong"
                },
                {
                    "creatorType": "author",
                    "firstName": "Wen-Lang",
                    "lastName": "Fan"
                },
                {
                    "creatorType": "author",
                    "firstName": "Hong-Da",
                    "lastName": "Chen"
                },
                {
                    "creatorType": "author",
                    "firstName": "Jan",
                    "lastName": "Wigger"
                },
                {
                    "creatorType": "author",
                    "firstName": "Andrew E.",
                    "lastName": "Torda"
                },
                {
                    "creatorType": "author",
                    "firstName": "H. C.",
                    "lastName": "Lee"
                }
            ],
            "abstractNote": "We propose an order index, ϕ, which gives a quantitative measure of randomness and order of complete genomic sequences. It maps genomes to a number from 0 (random and of infinite length) to 1 (fully ordered) and applies regardless of sequence length. The 786 complete genomic sequences in GenBank were found to have ϕ values in a very narrow range, ϕg=0.031−0.015+0.028. We show this implies that genomes are halfway toward being completely random, or, at the “edge of chaos.” We further show that artificial “genomes” converted from literary classics have ϕ’s that almost exactly coincide with ϕg, but sequences of low information content do not. We infer that ϕg represents a high information-capacity “fixed point” in sequence space, and that genomes are driven to it by the dynamics of a robust growth and evolution process. We show that a growth process characterized by random segmental duplication can robustly drive genomes to the fixed point.",
            "publicationTitle": "Physical Review E",
            "publisher": "",
            "place": "",
            "date": "June 09, 2009",
            "volume": "79",
            "issue": "6",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "061911",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "Phys. Rev. E",
            "DOI": "10.1103/PhysRevE.79.061911",
            "citationKey": "",
            "url": "http://link.aps.org/doi/10.1103/PhysRevE.79.061911",
            "accessDate": "2011-03-09T02:12:09Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "APS",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:24Z",
            "dateModified": "2011-07-27T01:16:24Z"
        }
    },
    {
        "key": "3IDTD939",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/3IDTD939",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/3IDTD939",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Karlin",
            "parsedDate": "2005",
            "numChildren": 0
        },
        "data": {
            "key": "3IDTD939",
            "version": 1,
            "itemType": "journalArticle",
            "title": "Statistical signals in bioinformatics",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Samuel",
                    "lastName": "Karlin"
                }
            ],
            "abstractNote": "The Arthur M. Sackler Colloquium of the National Academy of Sciences, “Frontiers in Bioinformatics: Unsolved Problems and Challenges,” organized by David Eisenberg, Russ Altman, and myself, was held October 15-17, 2004, to provide a forum for discussing concepts and methods in bioinformatics serving the biological and medical sciences. The deluge of genomic and proteomic data in the last two decades has driven the creation of tools that search and analyze biomolecular sequences and structures. Bioinformatics is highly interdisciplinary, using knowledge from mathematics, statistics, computer science, biology, medicine, physics, chemistry, and engineering.",
            "publicationTitle": "Proceedings of the National Academy of Sciences of the United States of America",
            "publisher": "",
            "place": "",
            "date": "2005",
            "volume": "102",
            "issue": "38",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "13355 -13362",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "",
            "DOI": "10.1073/pnas.0501804102",
            "citationKey": "",
            "url": "http://www.pnas.org/content/102/38/13355.abstract",
            "accessDate": "2011-05-09T01:24:58Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "Highwire 2.0",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:24Z",
            "dateModified": "2011-07-27T01:16:24Z"
        }
    },
    {
        "key": "3QS4MCHS",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/3QS4MCHS",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/3QS4MCHS",
                "type": "text/html"
            },
            "up": {
                "href": "https://api.zotero.org/groups/50486/items/G2HQJM9B",
                "type": "application/json"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            }
        },
        "data": {
            "key": "3QS4MCHS",
            "version": 1,
            "parentItem": "G2HQJM9B",
            "itemType": "attachment",
            "linkMode": "linked_url",
            "title": "PubMed Central Link",
            "accessDate": "2011-05-09T00:58:08Z",
            "url": "http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2784323/",
            "note": "",
            "contentType": "text/html",
            "charset": "",
            "tags": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:13Z",
            "dateModified": "2011-07-27T01:16:13Z"
        }
    },
    {
        "key": "AP2TEX78",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/AP2TEX78",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/AP2TEX78",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Curnow and Kirkwood",
            "parsedDate": "1989-01-01",
            "numChildren": 0
        },
        "data": {
            "key": "AP2TEX78",
            "version": 1,
            "itemType": "journalArticle",
            "title": "Statistical Analysis of Deoxyribonucleic Acid Sequence Data--A Review",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "R. N.",
                    "lastName": "Curnow"
                },
                {
                    "creatorType": "author",
                    "firstName": "T. B. L.",
                    "lastName": "Kirkwood"
                }
            ],
            "abstractNote": "A review is made of new statistical problems presented by the recent rapid growth in availability of deoxyribonucleic acid (DNA) sequence information. These problems include the analysis of DNA sequence composition, the detection of sequence homologies, the identification of sequence control elements and the design of genetic probes.",
            "publicationTitle": "Journal of the Royal Statistical Society. Series A (Statistics in Society)",
            "publisher": "",
            "place": "",
            "date": "January 01, 1989",
            "volume": "152",
            "issue": "2",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "199-220",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "",
            "DOI": "10.2307/2982915",
            "citationKey": "",
            "url": "http://www.jstor.org/stable/2982915",
            "accessDate": "2011-03-12T13:25:20Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "09641998",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "JSTOR",
            "callNumber": "",
            "rights": "",
            "extra": "ArticleType: research-article / Full publication date: 1989 / Copyright © 1989 Royal Statistical Society",
            "tags": [],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:13Z",
            "dateModified": "2011-07-27T01:16:13Z"
        }
    },
    {
        "key": "P3SGBNHN",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/P3SGBNHN",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/P3SGBNHN",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Church and Helfman",
            "parsedDate": "1993-06-01",
            "numChildren": 0
        },
        "data": {
            "key": "P3SGBNHN",
            "version": 1,
            "itemType": "journalArticle",
            "title": "Dotplot: A Program for Exploring Self-Similarity in Millions of Lines of Text and Code",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Kenneth Ward",
                    "lastName": "Church"
                },
                {
                    "creatorType": "author",
                    "firstName": "Jonathan Isaac",
                    "lastName": "Helfman"
                }
            ],
            "abstractNote": "An interactive program, dotplot, has been developed for browsing millions of lines of text and source code, using an approach borrowed from biology for studying homology (self-similarity) in DNA sequences. With conventional browsing tools such as a screen editor, it is difficult to identify structures that are too big to fit on the screen. In contrast, with dotplots we find that many of these structures show up as diagonals, squares, textures, and other visually recognizable features, as will be illustrated in examples selected from biology and two new application domains, text (AP news, Canadian Hansards) and source code (5ESS®). In an attempt to isolate the mechanisms that produce these features, we have synthesized similar features in dotplots of artificial sequences. We also introduce an approximation that makes the calculation of dotplots practical for use in an interactive browser.",
            "publicationTitle": "Journal of Computational and Graphical Statistics",
            "publisher": "",
            "place": "",
            "date": "June 01, 1993",
            "volume": "2",
            "issue": "2",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "153-174",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "",
            "DOI": "10.2307/1390697",
            "citationKey": "",
            "url": "http://www.jstor.org/stable/1390697",
            "accessDate": "2011-03-09T02:07:59Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "10618600",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "Dotplot",
            "language": "",
            "libraryCatalog": "JSTOR",
            "callNumber": "",
            "rights": "",
            "extra": "ArticleType: research-article / Full publication date: Jun., 1993 / Copyright © 1993 American Statistical Association, Institute of Mathematical Statistics and Interface Foundation of America",
            "tags": [],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:13Z",
            "dateModified": "2011-07-27T01:16:13Z"
        }
    },
    {
        "key": "5Z9X3G25",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/5Z9X3G25",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/5Z9X3G25",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Apostolico and Denas",
            "parsedDate": "2008",
            "numChildren": 0
        },
        "data": {
            "key": "5Z9X3G25",
            "version": 1,
            "itemType": "journalArticle",
            "title": "Fast algorithms for computing sequence distances by exhaustive substring composition",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "A.",
                    "lastName": "Apostolico"
                },
                {
                    "creatorType": "author",
                    "firstName": "O.",
                    "lastName": "Denas"
                }
            ],
            "abstractNote": "",
            "publicationTitle": "Algorithms for Molecular Biology",
            "publisher": "",
            "place": "",
            "date": "2008",
            "volume": "3",
            "issue": "1",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "13",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "",
            "DOI": "",
            "citationKey": "",
            "url": "",
            "accessDate": "",
            "PMID": "",
            "PMCID": "",
            "ISSN": "",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "Google Scholar",
            "callNumber": "",
            "rights": "",
            "extra": "",
            "tags": [],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:13Z",
            "dateModified": "2011-07-27T01:16:13Z"
        }
    },
    {
        "key": "J78P9M23",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/J78P9M23",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/J78P9M23",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Gesine Reinert et al.",
            "parsedDate": "2004-07-05",
            "numChildren": 0
        },
        "data": {
            "key": "J78P9M23",
            "version": 1,
            "itemType": "webpage",
            "title": "Probabilistic and Statistical Properties of Words: An Overview",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "",
                    "lastName": "Gesine Reinert"
                },
                {
                    "creatorType": "author",
                    "firstName": "",
                    "lastName": "Sophie Schbath"
                },
                {
                    "creatorType": "author",
                    "firstName": "",
                    "lastName": "Michael S. Waterman"
                }
            ],
            "abstractNote": "",
            "websiteTitle": "",
            "websiteType": "research-article",
            "date": "2004-07-05",
            "publisher": "",
            "place": "",
            "DOI": "",
            "citationKey": "",
            "url": "http://www.liebertonline.com/doi/abs/10.1089/10665270050081360",
            "accessDate": "2009-07-06T07:08:21Z",
            "shortTitle": "Probabilistic and Statistical Properties of Words",
            "language": "",
            "rights": "",
            "extra": "In the following, an overview is given on statistical and probabilistic properties of words, as occurring in the analysis of biological sequences. Counts of occurrence, counts of clumps, and renewal counts are distinguished, and exact distributions as well as normal approximations, Poisson process approximations, and compound Poisson approximations are derived. Here, a sequence is modelled as a stationary ergodic Markov chain; a test for determining the appropriate order of the Markov chain is described. The convergence results take the error made by estimating the Markovian transition probabilities into account. The main tools involved are moment generating functions, martingales, Stein’s method, and the Chen-Stein method. Similar results are given for occurrences of multiple patterns, and, as an example, the problem of unique recoverability of a sequence from SBH chip data is discussed. Special emphasis lies on disentangling the complicated dependence structure between word occurrences, due to self-over...",
            "tags": [
                {
                    "tag": "COMPOUND POISSON APPROXIMATION",
                    "type": 1
                },
                {
                    "tag": "EXACT DISTRIBUTION",
                    "type": 1
                },
                {
                    "tag": "MARKOV MODEL",
                    "type": 1
                },
                {
                    "tag": "MARTINGALES",
                    "type": 1
                },
                {
                    "tag": "POISSON PROCESS APPROXIMATION",
                    "type": 1
                },
                {
                    "tag": "RENEWAL COUNTS",
                    "type": 1
                },
                {
                    "tag": "SEQUENCING BY HYBRIDIZATION",
                    "type": 1
                },
                {
                    "tag": "STEIN'S METHOD",
                    "type": 1
                },
                {
                    "tag": "WORD COUNTS",
                    "type": 1
                }
            ],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:13Z",
            "dateModified": "2011-07-27T01:16:13Z"
        }
    },
    {
        "key": "FIMM3CBR",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/FIMM3CBR",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/FIMM3CBR",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Csurös et al.",
            "parsedDate": "2007-11",
            "numChildren": 0
        },
        "data": {
            "key": "FIMM3CBR",
            "version": 1,
            "itemType": "journalArticle",
            "title": "Reconsidering the significance of genomic word frequencies",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Miklós",
                    "lastName": "Csurös"
                },
                {
                    "creatorType": "author",
                    "firstName": "Laurent",
                    "lastName": "Noé"
                },
                {
                    "creatorType": "author",
                    "firstName": "Gregory",
                    "lastName": "Kucherov"
                }
            ],
            "abstractNote": "By conventional wisdom, a feature that occurs too often or too rarely in a genome can indicate a functional element. To infer functionality from frequency, it is crucial to precisely characterize occurrences in randomly evolving DNA. We find that the frequency of oligonucleotides in a genomic sequence follows primarily a Pareto-lognormal distribution, which encapsulates lognormal and power-law features found across all known genomes. Such a distribution could be the result of completely random evolution by a copying process. Our characterization of the entire frequency distribution of genomic words opens a way to a more accurate reasoning about their over- and underrepresentation in genomic sequences.",
            "publicationTitle": "Trends in Genetics: TIG",
            "publisher": "",
            "place": "",
            "date": "Nov 2007",
            "volume": "23",
            "issue": "11",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "543-546",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "Trends Genet",
            "DOI": "10.1016/j.tig.2007.07.008",
            "citationKey": "",
            "url": "http://www.ncbi.nlm.nih.gov/pubmed/17964682",
            "accessDate": "2011-05-09T01:10:29Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "0168-9525",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "NCBI PubMed",
            "callNumber": "",
            "rights": "",
            "extra": "PMID: 17964682",
            "tags": [
                {
                    "tag": "Animals",
                    "type": 1
                },
                {
                    "tag": "Evolution, Molecular",
                    "type": 1
                },
                {
                    "tag": "Gene Duplication",
                    "type": 1
                },
                {
                    "tag": "Genome",
                    "type": 1
                },
                {
                    "tag": "Genomics",
                    "type": 1
                },
                {
                    "tag": "Humans",
                    "type": 1
                },
                {
                    "tag": "Markov Chains",
                    "type": 1
                },
                {
                    "tag": "Oligonucleotides",
                    "type": 1
                }
            ],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:13Z",
            "dateModified": "2011-07-27T01:16:13Z"
        }
    },
    {
        "key": "G2HQJM9B",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/G2HQJM9B",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/G2HQJM9B",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Chor et al.",
            "parsedDate": "2009",
            "numChildren": 1
        },
        "data": {
            "key": "G2HQJM9B",
            "version": 1,
            "itemType": "journalArticle",
            "title": "Genomic DNA k-mer spectra: models and modalities",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Benny",
                    "lastName": "Chor"
                },
                {
                    "creatorType": "author",
                    "firstName": "David",
                    "lastName": "Horn"
                },
                {
                    "creatorType": "author",
                    "firstName": "Nick",
                    "lastName": "Goldman"
                },
                {
                    "creatorType": "author",
                    "firstName": "Yaron",
                    "lastName": "Levy"
                },
                {
                    "creatorType": "author",
                    "firstName": "Tim",
                    "lastName": "Massingham"
                }
            ],
            "abstractNote": "Tetrapods, unlike other organisms, have multimodal spectra of k-mers in their genomes",
            "publicationTitle": "",
            "publisher": "",
            "place": "",
            "date": "2009",
            "volume": "10",
            "issue": "10",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "R108-R108",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "Genome Biol",
            "DOI": "10.1186/gb-2009-10-10-r108",
            "citationKey": "",
            "url": "",
            "accessDate": "",
            "PMID": "",
            "PMCID": "",
            "ISSN": "1465-6906",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "Genomic DNA k-mer spectra",
            "language": "",
            "libraryCatalog": "PubMed Central",
            "callNumber": "",
            "rights": "",
            "extra": "PMID: 19814784\nPMCID: 2784323",
            "tags": [],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:13Z",
            "dateModified": "2011-07-27T01:16:13Z"
        }
    },
    {
        "key": "MNDEPWA7",
        "version": 1,
        "library": {
            "type": "group",
            "id": 50486,
            "name": "csiro-k-mers",
            "links": {
                "alternate": {
                    "href": "https://www.zotero.org/groups/csiro-k-mers",
                    "type": "text/html"
                }
            }
        },
        "links": {
            "self": {
                "href": "https://api.zotero.org/groups/50486/items/MNDEPWA7",
                "type": "application/json"
            },
            "alternate": {
                "href": "https://www.zotero.org/groups/csiro-k-mers/items/MNDEPWA7",
                "type": "text/html"
            }
        },
        "meta": {
            "createdByUser": {
                "id": 147989,
                "username": "David.Lovell",
                "name": "David Lovell",
                "links": {
                    "alternate": {
                        "href": "https://www.zotero.org/david.lovell",
                        "type": "text/html"
                    }
                }
            },
            "creatorSummary": "Chen et al.",
            "parsedDate": "2005-07",
            "numChildren": 0
        },
        "data": {
            "key": "MNDEPWA7",
            "version": 1,
            "itemType": "journalArticle",
            "title": "Model for the distributions of k-mers in DNA sequences",
            "creators": [
                {
                    "creatorType": "author",
                    "firstName": "Yaw-Hwang",
                    "lastName": "Chen"
                },
                {
                    "creatorType": "author",
                    "firstName": "Su-Long",
                    "lastName": "Nyeo"
                },
                {
                    "creatorType": "author",
                    "firstName": "Chiung-Yuh",
                    "lastName": "Yeh"
                }
            ],
            "abstractNote": "The evolutionary features based on the distributions of k-mers in the DNA sequences of various organisms are studied. The organisms are classified into three groups based on their evolutionary periods: (a) E. coli and T. pallidum (b) yeast, zebrafish, A. thaliana, and fruit fly, (c) mouse, chicken, and human. The distributions of 6-mers of these three groups are shown to be, respectively, (a) unimodal, (b) unimodal with peaks generally shifted to smaller frequencies of occurrence, (c) bimodal. To describe the bimodal feature of the k-mer distributions of group (c), a model based on the cytosine-guanine \" CG\" content of the DNA sequences is introduced and shown to provide reasonably good agreements.",
            "publicationTitle": "Physical Review. E, Statistical, Nonlinear, and Soft Matter Physics",
            "publisher": "",
            "place": "",
            "date": "Jul 2005",
            "volume": "72",
            "issue": "1 Pt 1",
            "section": "",
            "partNumber": "",
            "partTitle": "",
            "pages": "011908",
            "series": "",
            "seriesTitle": "",
            "seriesText": "",
            "journalAbbreviation": "Phys Rev E Stat Nonlin Soft Matter Phys",
            "DOI": "",
            "citationKey": "",
            "url": "http://www.ncbi.nlm.nih.gov/pubmed/16090002",
            "accessDate": "2011-05-09T01:05:06Z",
            "PMID": "",
            "PMCID": "",
            "ISSN": "1539-3755",
            "archive": "",
            "archiveLocation": "",
            "shortTitle": "",
            "language": "",
            "libraryCatalog": "NCBI PubMed",
            "callNumber": "",
            "rights": "",
            "extra": "PMID: 16090002",
            "tags": [
                {
                    "tag": "Animals",
                    "type": 1
                },
                {
                    "tag": "Arabidopsis",
                    "type": 1
                },
                {
                    "tag": "Base Sequence",
                    "type": 1
                },
                {
                    "tag": "Chickens",
                    "type": 1
                },
                {
                    "tag": "Chromosomes, Human, Pair 21",
                    "type": 1
                },
                {
                    "tag": "Conserved Sequence",
                    "type": 1
                },
                {
                    "tag": "Cytosine",
                    "type": 1
                },
                {
                    "tag": "DNA",
                    "type": 1
                },
                {
                    "tag": "Dimerization",
                    "type": 1
                },
                {
                    "tag": "Drosophila",
                    "type": 1
                },
                {
                    "tag": "Escherichia coli",
                    "type": 1
                },
                {
                    "tag": "Evolution, Molecular",
                    "type": 1
                },
                {
                    "tag": "Genes, Fungal",
                    "type": 1
                },
                {
                    "tag": "Genes, Plant",
                    "type": 1
                },
                {
                    "tag": "Genome",
                    "type": 1
                },
                {
                    "tag": "Genome, Bacterial",
                    "type": 1
                },
                {
                    "tag": "Guanine",
                    "type": 1
                },
                {
                    "tag": "Humans",
                    "type": 1
                },
                {
                    "tag": "Mice",
                    "type": 1
                },
                {
                    "tag": "Models, Statistical",
                    "type": 1
                },
                {
                    "tag": "Nucleic Acid Conformation",
                    "type": 1
                },
                {
                    "tag": "Sequence Analysis, DNA",
                    "type": 1
                },
                {
                    "tag": "Treponema pallidum",
                    "type": 1
                },
                {
                    "tag": "Zebrafish",
                    "type": 1
                },
                {
                    "tag": "probability",
                    "type": 1
                }
            ],
            "collections": [],
            "relations": {},
            "dateAdded": "2011-07-27T01:16:13Z",
            "dateModified": "2011-07-27T01:16:13Z"
        }
    }
]