Add data buffering

This commit is contained in:
Oystein Kristoffer Tveit 2020-05-27 17:28:01 +02:00
parent 86e2c54a9f
commit 8701d50e64
4 changed files with 39 additions and 8 deletions

5
.gitignore vendored
View File

@ -1,10 +1,15 @@
#LaTeX output files
main.out
main.log
main.aux
main.synctex.gz
main.toc
#Data to be reprocessed
data/testing/
data/pages/
data/tables/
data/jisho
#Libraries
jishoScrape/node_modules

View File

@ -24,6 +24,7 @@ folders:
mkdir data/pages
mkdir data/tables
mkdir data/testing
mkdir data/jisho
cd jishoScrape
npm install
cd ..

View File

@ -1,7 +1,7 @@
const fs = require('fs');
/* Import local files */
const {fetchKanjiFromTxt, fetchKanjiFromJisho} = require('./src/dataFetching.js');
const {fetchKanjiFromTxt, fetchJishoBufferData, fetchKanjiFromJisho} = require('./src/dataFetching.js');
const {getKanjiTexData} = require('./src/texConversion.js');
const {kanjiTable} = require('./src/kanjiTables.js');
@ -10,12 +10,25 @@ async function main(jlptLevel) {
const jlptLevelCaps = jlptLevel.toUpperCase();
/* Fetch data from buffer if available.
* Else fetch data from txt and jisho requests,
* and make buffer files
*/
if(fs.existsSync(`./data/jisho/${jlptLevel}.json`)) {
var jishoResults = await fetchJishoBufferData(`./data/jisho/${jlptLevel}.json`);
console.log(`${jlptLevelCaps}: Fetched Jisho data from buffer`);
} else {
const kanjiArray = await fetchKanjiFromTxt(`./data/txt/${jlptLevel}.txt`);
console.log(`${jlptLevelCaps}: Fetched txt`);
const jishoResults = await fetchKanjiFromJisho(kanjiArray);
var jishoResults = await fetchKanjiFromJisho(kanjiArray);
console.log(`${jlptLevelCaps}: Fetched Jisho data`);
fs.writeFile(`./data/jisho/${jlptLevel}.json`, JSON.stringify(jishoResults, null, " "), (err) => {if (err) console.error(err)});
console.log(`${jlptLevelCaps}: Written Jisho data to buffer`);
}
const sortedKanjiArray = jishoResults.map(result => result.query);
const texData = getKanjiTexData(jishoResults);
console.log(`${jlptLevelCaps}: Processed pages`);

View File

@ -6,7 +6,7 @@ const jisho = new jishoApi();
/**
* Reads a txt file and splits the characters into an array
* @param {string} file Path to file
* @returns {string[]} Kanji
* @returns {string[]} A list of Kanji
*/
async function fetchKanjiFromTxt(file) {
const read = util.promisify(fs.readFile);
@ -14,10 +14,21 @@ async function fetchKanjiFromTxt(file) {
return data.split('');
}
/**
* Reads a json file and returns the data as an object
* @param {string} file Path to file
* @returns {object} Jisho results
*/
async function fetchJishoBufferData(file) {
const read = util.promisify(fs.readFile);
const data = await read(file, 'utf8');
return JSON.parse(data);
}
/**
* Makes a delayed kanji search request in order not to overload the server.
* @param {string} kanji The character to search for
* @param {number} delay Number of milliseconds delay to the request
* @param {string} kanji A character to search for
* @param {number} delay A number of milliseconds delay to the request
* @return {promise} A promise that's going to run a request after the specified delay
*/
async function delayedJishoCall(kanji, delay) {
@ -43,4 +54,5 @@ async function fetchKanjiFromJisho(kanjiArray) {
}
exports.fetchKanjiFromTxt = fetchKanjiFromTxt;
exports.fetchJishoBufferData = fetchJishoBufferData;
exports.fetchKanjiFromJisho = fetchKanjiFromJisho;