TDT4310-project-sorted-japa.../project_report/references.bib

@online{jouyou,
  title={常用漢字表の音訓索引},
  author={Agency for Cultural Affairs, Government of Japan},
  url={https://www.bunka.go.jp/kokugo_nihongo/sisaku/joho/joho/kijun/naikaku/kanji/joyokanjisakuin/index.html},
  urldate={2023-04-17}
}

@online{jst,
  title={Jisho Study Tool},
  author={h7x4},
  url={https://github.com/h7x4/Jisho-Study-Tool},
  urldate={2023-04-15}
}

@inproceedings{jmdict,
  title={JMdict: a Japanese-Multilingual Dictionary},
  author={Jim Breen},
  year={2004},
  url={https://www.edrdg.org/jmdict/jmdictart.html}
}

@inproceedings{tanaka-corpus,
  title={Compilation of a multilingual parallel corpus},
  author={Yuki Tanaka},
  year={2001},
  url="https://www.edrdg.org/projects/tanaka/tanaka.pdf"
}

@inproceedings{portuguese,
author = {Curto, Pedro and Mamede, Nuno and Baptista, Jorge},
year = {2015},
month = {01},
pages = {36-44},
title = {Automatic Text Difficulty Classifier - Assisting the Selection Of Adequate Reading Materials For European Portuguese Teaching},
doi = {10.5220/0005428300360044}
}

@online{ve,
  url="https://github.com/Kimtaro/ve/blob/master/lib/providers/japanese_transliterators.rb",
  title="japanese_transliterators.rb",
  author={Kim Ahlstrom},
  urldate={2023-04-19}
}

@online{xmldtd,
  title = {Prolog and Document Type Declaration},
  author = {World wide web consortium},
  url="https://www.w3.org/TR/xml11/#sec-prolog-dtd",
  urldate = {2023-04-22}
}

@article{swsm,
author="Komiya, Kanako and Sasaki, Yuto and Morita, Hajime and Sasaki, Minoru and Shinnou, Hiroyuki and Kotani, Yoshiyuki",
title="Surrounding Word Sense Model for Japanese All-words Word Sense Disambiguation",
journal="Proceedings of the 29th Pacific Asia Conference on Language, Information and Computation",
year="2015",
pages="35-43",
URL="https://cir.nii.ac.jp/crid/1050282677488198784"
}

@book{jurafsky-23,
author = {Jurafsky, Daniel and Martin, James H.},
title = {Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics, and Speech Recognition},
year = {2000},
isbn = {0130950696},
publisher = {Prentice Hall PTR},
address = {USA},
edition = {1st},
abstract = {From the Publisher:This book takes an empirical approach to language processing, based on applying statistical and other machine-learning algorithms to large corpora. Methodology boxes are included in each chapter. Each chapter is built around one or more worked examples to demonstrate the main idea of the chapter. Covers the fundamental algorithms of various fields, whether originally proposed for spoken or written language to demonstrate how the same algorithm can be used for speech recognition and word-sense disambiguation. Emphasis on web and other practical applications. Emphasis on scientific evaluation. Useful as a reference for professionals in any of the areas of speech and language processing.}
}

@inproceedings{mccann-2020-fugashi,
    title = "fugashi, a Tool for Tokenizing {J}apanese in Python",
    author = "McCann, Paul",
    booktitle = "Proceedings of Second Workshop for NLP Open Source Software (NLP-OSS)",
    month = nov,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/2020.nlposs-1.7",
    pages = "44--51",
    abstract = "Recent years have seen an increase in the number of large-scale multilingual NLP projects. However, even in such projects, languages with special processing requirements are often excluded. One such language is Japanese. Japanese is written without spaces, tokenization is non-trivial, and while high quality open source tokenizers exist they can be hard to use and lack English documentation. This paper introduces fugashi, a MeCab wrapper for Python, and gives an introduction to tokenizing Japanese.",
}

@online{jisho,
  url="https://jisho.org/about",
  urldate={2023-04-20},
  title="Jisho.org",
  author="Kim Ahlstrom"
}