YB
Size: a a a
YB
YB
A1
A
m
R
):library(data.table)
library(stringdist)
# Load a CSV with source Dutch paragraphs
# (column "nl") and their Google-Translated
# Russian counterparts (column "ru")
# (one paragraph per line)
nl_ru_para_pairs <- data.table::fread("nl_ru_pairs.csv")
# Load a CSV with target Russian translations
# (column "ru") in alphabetical order
# (one paragraph per line)
ru_paras <- data.table::fread("ru_paras.csv")
# Map target Russian translations to source Dutch paras
ru_paras[, nl_inferred := nl_ru_para_pairs[stringdist::amatch(ru_paras$ru, nl_ru_para_pairs$ru, method = "qgram", q = 5)]$nl ]
# NB: experiment with different string-matching methods
# in amatch to obtain feasible results. Beware the memory and time
# consumption of the task. (https://journal.r-project.org/archive/2014-1/loo.pdf)
# Obtain the order for target Russian translations.
# NB: duplicates are not respected.
ru_paras[, source_order := base::match(ru_paras$nl_inferred, nl_ru_para_pairs$nl)]
A
R
):library(data.table)
library(stringdist)
# Load a CSV with source Dutch paragraphs
# (column "nl") and their Google-Translated
# Russian counterparts (column "ru")
# (one paragraph per line)
nl_ru_para_pairs <- data.table::fread("nl_ru_pairs.csv")
# Load a CSV with target Russian translations
# (column "ru") in alphabetical order
# (one paragraph per line)
ru_paras <- data.table::fread("ru_paras.csv")
# Map target Russian translations to source Dutch paras
ru_paras[, nl_inferred := nl_ru_para_pairs[stringdist::amatch(ru_paras$ru, nl_ru_para_pairs$ru, method = "qgram", q = 5)]$nl ]
# NB: experiment with different string-matching methods
# in amatch to obtain feasible results. Beware the memory and time
# consumption of the task. (https://journal.r-project.org/archive/2014-1/loo.pdf)
# Obtain the order for target Russian translations.
# NB: duplicates are not respected.
ru_paras[, source_order := base::match(ru_paras$nl_inferred, nl_ru_para_pairs$nl)]
VI
AM
VI
AM
VI
VI
VI
AM
VI
AY
AY
T
AY