Wikisłownik hsbwiktionary https://hsb.wiktionary.org/wiki/H%C5%82owna_strona MediaWiki 1.44.0-wmf.4 case-sensitive Media Specialnje Diskusija Wužiwar Diskusija z wužiwarjom Wikisłownik Diskusija k Wikisłownikej Dataja Diskusija k dataji MediaWiki MediaWiki diskusija Předłoha Diskusija k předłoze Pomoc Pomoc diskusija Kategorija Diskusija ke kategoriji TimedText TimedText talk Modul Modul diskusija Afghaničan 0 3557 17582 17567 2024-11-22T08:54:21Z Sławobóg 2519 /* Přełožki */ 17582 wikitext text/x-wiki ==Hornjoserbšćina== [[Dataja:Defense.gov photo essay 100102-A-6365W-069.jpg|thumb|Afghaničan]] ===Alternatiwne formy=== * {{alt|hsb|Afganičan}} ===Etymologija=== Wot {{suf|hsb|Afghanistan|alt1=Afghani(stan)|-čan}}. ====Wurjekowanje==== * {{hsb-IPA|Af-ga'ničan}} ====Wěcownik==== {{hsb-wěc|m-wos|ž=Afghaničanka}} # [[wobydler]] [[Afghanistan|Afghanistanje]] =====Skłonjowanje===== {{hsb-skłon-wěc|m.wos.locu}} ====Přiwuzne wopřijeća==== {{kol-auto|hsb|title=kajkostniki|afghanski}} =====Přełožki===== {{přełožki-hora|wobydler Afghanistanje}} * arabski: {{p+|ar|أَفْغَانِيّ|m}} * běłoruski: {{p|be|афга́нец|m}} * bołharski: {{p+|bg|афганиста́нец|m}} * chinski: *: mandarinski: {{p+|cmn|阿富汗人|tr=Āfùhànrén}} * čěski: {{p+|cs|Afghánec|m}} * delnjoserbski: {{p|dsb|Afghanaŕ|m}} * finski: {{p+|fi|afganistanilainen}} * francoski: {{p+|fr|Afghan|m}} * grjekski: {{p+|el|Αφγανός|m}} * jendźelski: {{p+|en|Afghan}} * kašubski: {{p|csb|Afgańczik|m}} * makedonski: {{p|mk|Авганистанец|m}} * němski: {{p+|de|Afghane|m}} * portugalski: {{p+|pt|afegão|m}} * pólski: {{p+|pl|Afgańczyk|m}} * ruski: {{p+|ru|афга́нец|m}} * słowakski: {{p+|sk|Afganec|m}} * słowjenski: {{p+|sl|Afgánistanec|m}} * španiski: {{p|es|afgano|m}} * turkmenski: {{p|tk|owgan}} * turkowski: {{p+|tr|Afgan}} * ujgurski: {{p|ug|ئافغان}} * uzbekski: {{p|uz|afgʻon}} * ukrainski: {{p+|uk|афга́нець|m}} {{přełožki-spódk}} fqc7pf8fovo3obkozn1xuueja7ndefr Modul:hsb-noun 828 5846 17583 17394 2024-11-22T09:02:16Z Sławobóg 2519 17583 Scribunto text/plain local export = {} --[=[ Authorship: Zhnka ]=] --[=[ TERMINOLOGY: -- "slot" = A particular combination of case/number. Example slot names for nouns are "gen_s" (genitive singular) and "voc_p" (vocative plural). Each slot is filled with zero or more forms. -- "form" = The declined form representing the value of a given slot. -- "lemma" = The dictionary form. Generally the nominative masculine singular, but may occasionally be another form if the nominative masculine singular is missing. ]=] local lang = require("Modul:languages").getByCode("hsb") local m_table = require("Modul:table") local m_links = require("Modul:links") local m_string_utilities = require("Modul:string utilities") local iut = require("Modul:inflection utilities") local m_para = require("Modul:parameters") local com = require("Modul:hsb-common") local current_title = mw.title.getCurrentTitle() local NAMESPACE = current_title.nsText local PAGENAME = current_title.text local u = mw.ustring.char local rsplit = mw.text.split local rfind = mw.ustring.find local rmatch = mw.ustring.match local rgmatch = mw.ustring.gmatch local rsubn = mw.ustring.gsub local ulen = mw.ustring.len local usub = mw.ustring.sub local uupper = mw.ustring.upper local ulower = mw.ustring.lower local force_cat = false -- set to true to make categories appear in non-mainspace pages, for testing -- version of rsubn() that discards all but the first return value local function rsub(term, foo, bar) local retval = rsubn(term, foo, bar) return retval end -- version of rsubn() that returns a 2nd argument boolean indicating whether -- a substitution was made. local function rsubb(term, foo, bar) local retval, nsubs = rsubn(term, foo, bar) return retval, nsubs > 0 end local function track(track_id) require("Modul:debug/track")("hsb-noun/" .. track_id) return true end local output_noun_slots = { nom_s = "nom|s", gen_s = "gen|s", dat_s = "dat|s", acc_s = "acc|s", voc_s = "voc|s", loc_s = "loc|s", ins_s = "ins|s", nom_d = "nom|d", gen_d = "gen|d", dat_d = "dat|d", acc_d = "acc|d", voc_d = "voc|d", loc_d = "loc|d", ins_d = "ins|d", nom_p = "nom|p", gen_p = "gen|p", dat_p = "dat|p", acc_p = "acc|p", voc_p = "voc|p", loc_p = "loc|p", ins_p = "ins|p", } local function get_output_noun_slots(alternant_multiword_spec) -- FIXME: To save memory we modify the table in-place. This won't work if we ever end up with multiple calls to -- this module in the same Lua invocation, and we would need to clone the table. if alternant_multiword_spec.actual_number ~= "allthree" then for slot, accel_form in pairs(output_noun_slots) do output_noun_slots[slot] = accel_form:gsub("|[sp]$", "") end end return output_noun_slots end local potential_lemma_slots = {"nom_s", "nom_p", "gen_s"} local cases = { nom = true, gen = true, dat = true, acc = true, voc = true, loc = true, ins = true, } local clitic_cases = { gen = true, dat = true, acc = true, } local function dereduce(base, stem) local dereduced_stem = com.dereduce(base, stem) if not dereduced_stem then error("Unable to dereduce stem '" .. stem .. "'") end return dereduced_stem end local function skip_slot(number, slot) return number == "jed" and rfind(slot, "_p$") or number == "mn" and rfind(slot, "_s$") end -- Basic function to combine stem(s) and ending(s) and insert the result into the appropriate slot. `stems` is either -- the `stems` object passed into the declension functions (containing the various stems; see below) or a string to -- override the stem. (NOTE: If you pass a string in as `stems`, you should pass the value of `stems.footnotes` as the -- value of `footnotes` as it will be lost otherwise. If you need to supply your own footnote in addition, use -- iut.combine_footnotes() to combine any user-specified footnote(s) with your footnote(s).) `endings` is either a -- string specifying a single ending or a list of endings. If `endings` is nil, no forms are inserted. If an ending is -- "-", the value of `stems` is ignored and the lemma is used instead as the stem; this is important in case the user -- used `decllemma:` to specify a declension lemma different from the actual lemma, or specified '.foreign' (which has -- a similar effect). local function add(base, slot, stems, endings, footnotes) if not endings then return end -- Call skip_slot() based on the declined number; if the actual number is different, we correct this in -- decline_noun() at the end. if skip_slot(base.number, slot) then return end local stems_footnotes = type(stems) == "table" and stems.footnotes or nil footnotes = iut.combine_footnotes(iut.combine_footnotes(base.footnotes, stems_footnotes), footnotes) if type(endings) == "string" then endings = {endings} end for _, ending in ipairs(endings) do -- Compute the stem. If ending is "-", use the lemma regardless. Otherwise if `stems` is a string, use it. -- Otherwise `stems` is an object containing four stems (vowel-vs-non-vowel cross regular-vs-oblique); -- compute the appropriate stem based on the slot and whether the ending begins with a vowel. local stem if ending == "-" then stem = base.actual_lemma ending = "" elseif type(stems) == "string" then stem = stems else local is_vowel_ending = rfind(ending, "^" .. com.vowel_c) if stems.oblique_slots == "all" then if is_vowel_ending then stem = stems.oblique_vowel_stem else stem = stems.oblique_nonvowel_stem end elseif is_vowel_ending then stem = stems.vowel_stem else stem = stems.nonvowel_stem end end ending = iut.combine_form_and_footnotes(ending, footnotes) local function combine_stem_ending(stem, ending) return com.combine_stem_ending(base, slot, stem, ending) end iut.add_forms(base.forms, slot, stem, ending, combine_stem_ending) end end local function process_slot_overrides(base, do_slot) for slot, overrides in pairs(base.overrides) do -- Call skip_slot() based on the declined number; if the actual number is different, we correct this in -- decline_noun() at the end. if skip_slot(base.number, slot) then error("Override specified for invalid slot '" .. slot .. "' due to '" .. base.number .. "' number restriction") end if do_slot(slot) then base.slot_overridden[slot] = true base.forms[slot] = nil for _, override in ipairs(overrides) do for _, value in ipairs(override.values) do local form = value.form local combined_notes = iut.combine_footnotes(base.footnotes, value.footnotes) if override.full then if form ~= "" then iut.insert_form(base.forms, slot, {form = form, footnotes = combined_notes}) end else -- Convert a null ending to "-" in the acc/voc sg slots so that e.g. [[Kerberos]] declared as -- <m.sg.foreign.gena:u.acc-:a> works correctly and generates accusative 'Kerberos/Kerbera' not -- #'Kerber/Kerbera'. if (slot == "acc_s" or slot == "voc_s") and form == "" then form = "-" end for _, stems in ipairs(base.stem_sets) do add(base, slot, stems, form, combined_notes) end end end end end end end local function add_decl(base, stems, gen_s, dat_s, acc_s, voc_s, loc_s, ins_s, nom_d, gen_d, dat_d, nom_p, gen_p, dat_p, acc_p, loc_p, ins_p, nom_s, footnotes ) add(base, "nom_s", stems, "-", footnotes) add(base, "gen_s", stems, gen_s, footnotes) add(base, "dat_s", stems, dat_s, footnotes) add(base, "acc_s", stems, acc_s, footnotes) add(base, "voc_s", stems, voc_s, footnotes) add(base, "loc_s", stems, loc_s, footnotes) add(base, "ins_s", stems, ins_s, footnotes) add(base, "nom_d", stems, nom_d, footnotes) add(base, "gen_d", stems, gen_d, footnotes) add(base, "dat_d", stems, dat_d, footnotes) if base.number == "mn" then -- If this is a plurale tantum noun and we're processing the nominative plural, use the user-specified lemma -- rather than generating the plural from the synthesized singular, which may not match the specified lemma -- (e.g. [[tvargle]] "Olomouc cheese" using <m.pl.mixed> would try to generate 'tvargle/tvargly', and [[peníze]] -- "money" using <m.pl.#ě.genpl-> would try to generate 'peněze'). local acc_p_like_nom = m_table.deepEquals(nom_p, acc_p) nom_p = "-" if acc_p_like_nom then acc_p = "-" end end add(base, "nom_p", stems, nom_p, footnotes) add(base, "gen_p", stems, gen_p, footnotes) add(base, "dat_p", stems, dat_p, footnotes) add(base, "acc_p", stems, acc_p, footnotes) add(base, "loc_p", stems, loc_p, footnotes) add(base, "ins_p", stems, ins_p, footnotes) add(base, "nom_s", stems, nom_s, footnotes) end local function add_sg_decl(base, stems, gen_s, dat_s, acc_s, voc_s, loc_s, ins_s, footnotes ) add_decl(base, stems, gen_s, dat_s, acc_s, voc_s, loc_s, ins_s, nil, nil, nil, nil, nil, nil, nil, nil, nil, footnotes) end local function add_du_only_decl(base, stems, gen_d, dat_d, footnotes ) add_decl(base, stems, nil, nil, nil, nil, nil, nil, "-", gen_d, dat_d, nil, nil, nil, nil, nil, nil, footnotes) end local function add_pl_only_decl(base, stems, gen_p, dat_p, acc_p, loc_p, ins_p, footnotes ) add_decl(base, stems, nil, nil, nil, nil, nil, nil, nil, nil, nil, "-", gen_p, dat_p, acc_p, loc_p, ins_p, footnotes) end local function handle_derived_slots_and_overrides(base) local function is_non_derived_slot(slot) return slot ~= "voc_p" and slot ~= "acc_s" and slot ~= "clitic_acc_s" end local function is_derived_slot(slot) return not is_non_derived_slot(slot) end base.slot_overridden = {} -- Handle overrides for the non-derived slots. Do this before generating the derived -- slots so overrides of the source slots (e.g. nom_p) propagate to the derived slots. process_slot_overrides(base, is_non_derived_slot) -- Generate the remaining slots that are derived from other slots. if not base.pron and not base.det then -- Pronouns don't have a vocative (singular or plural). iut.insert_forms(base.forms, "voc_p", base.forms.nom_p) end if not base.forms.acc_s and not base.slot_overridden.acc_s then iut.insert_forms(base.forms, "acc_s", base.forms[base.animacy == "wěc" and "nom_s" or base.animacy == "wos" and "gen_s" or base.animacy == "zwj" and "gen_s"]) end if not base.forms.acc_d and not base.slot_overridden.acc_d then iut.insert_forms(base.forms, "acc_d", base.forms[base.animacy == "wěc" and "nom_d" or base.animacy == "wos" and "gen_d" or base.animacy == "zwj" and "nom_d"]) end if not base.forms.acc_p and not base.slot_overridden.acc_p then iut.insert_forms(base.forms, "acc_p", base.forms[base.animacy == "wěc" and "nom_p" or base.animacy == "wos" and "gen_p" or base.animacy == "zwj" and "nom_p"]) end if not base.forms.clitic_acc_s and not base.slot_overridden.clitic_acc_s then iut.insert_forms(base.forms, "clitic_acc_s", base.forms[base.animacy == "wěc" and "nom_s" or "clitic_gen_s"]) end -- Handle overrides for derived slots, to allow them to be overridden. process_slot_overrides(base, is_derived_slot) -- Compute linked versions of potential lemma slots, for use in {{hsb-noun}}. -- We substitute the original lemma (before removing links) for forms that -- are the same as the lemma, if the original lemma has links. for _, slot in ipairs(potential_lemma_slots) do iut.insert_forms(base.forms, slot .. "_linked", iut.map_forms(base.forms[slot], function(form) if form == base.orig_lemma_no_links and rfind(base.orig_lemma, "%[%[") then return base.orig_lemma else return form end end)) end end -- Table mapping declension types to functions to decline the noun. The function takes two arguments, `base` and -- `stems`; the latter specifies the computed stems (vowel vs. non-vowel, singular vs. plural) and whether the noun -- is reducible and/or has vowel alternations in the stem. Most of the specifics of determining which stem to use -- and how to modify it for the given ending are handled in add_decl(); the declension functions just need to generate -- the appropriate endings. local decls = {} -- Table specifying additional properties for declension types. Every declension type must have such a table, which -- specifies which category or categories to add and what annotation to show in the title bar of the declension table. -- -- * Only the `cat` property of this table is mandatory; there is also a `desc` property to specify the annotation, but -- this can be omitted and the annotation will then be computed from the `cat` property. The `cat` property is either -- a string, a list of strings or a function (of two arguments, `base` and `stems` as above) returning a string or -- list of strings. The string can contain the keywords GENDER to substitute the gender (and animacy for masculine -- nouns) and POS (to substitute the pluralized part of speech). The keyword GENPOS is equivalent to 'GENDER POS'. If -- no keyword is present, ' GENPOS' is added onto the end. If only GENDER is present, ' POS' is added onto the end. -- In all cases, the language name is added onto the beginning to form the full category name. -- * The `desc` property is of the same form as the `cat` property and specifies the annotation to display in the title -- bar (which may have the same format as the category minus the part of speech, or may be abbreviated). The value -- may not be a list of strings, as only one annotation is displayed. If omitted, it is derived from the category -- spec(s) by taking the last category (if more than one is given) and removing ' POS' before keyword substitution. local declprops = {} decls["hard-m"] = function(base, stems) local gen_s = base.animacy == "in" and {"a", "u"} or "a" local nom_p = base.animacy == "wos" and "ojo" or "y" local voc_s = not rmatch(base.lemma, ".*tr$") and "o" add_decl(base, stems, gen_s, "ej", acc_s, voc_s, "u", "om", "aj", "ow", "omaj", nom_p, "ow", "am", nil, "ach", "ami") add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, nil, nil, "e", "e") end declprops["hard-m"] = { desc = function(base, stems) return "muski twjerdy zdónk" end, cat = function(base, stems) return "muski twjerdy zdónk" end } decls["soft-m"] = function(base, stems) local gen_s = base.animacy == "in" and {"a", "u"} or "a" local nom_p = base.animacy == "wos" and "ojo" or "e" add_decl(base, com.addj(stems.oblique_vowel_stem), gen_s, "ej", acc_s, "o", "u", "om", "ej", "ow", "omaj", nom_p, "ow", "am", nil, "ach", "emi") end declprops["soft-m"] = { desc = function(base, stems) return "muski mjechki zdónk" end, cat = function(base, stems) return "muski mjechki zdónk" end } decls["czs-m"] = function(base, stems) local gen_s = base.animacy == "in" and {"a", "u"} or "a" local nom_p = base.animacy == "wos" and "ojo" or "y" add_decl(base, stems, gen_s, "ej", acc_s, "o", "u", "om", "aj", "ow", "omaj", nom_p, "ow", "am", nil, "ach", "ami") end declprops["czs-m"] = { desc = function(base, stems) return "muski twjerdy syčacy zdónk" end, cat = function(base, stems) return "muski twjerdy zdónk" end } decls["velar-m"] = function(base, stems) local gen_s = base.animacy == "in" and {"a", "u"} or "a" local nom_p = base.animacy == "wos" and "ojo" or "i" add_decl(base, stems, gen_s, "ej", acc_s, "o", "u", "om", "aj", "ow", "omaj", nom_p, "ow", "am", nil, "ach", "ami") add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, nil, nil, nil, "e") end declprops["velar-m"] = { desc = function(base, stems) return "masculine velar stem" end, cat = function(base, stems) return "masculine velar stem" end } decls["adje-m"] = function(base, stems) if rmatch(base.lemma, "^.*" .. com.velar_c .. "i$") then add_decl(base, stems, "eho", "emu", nil, "-", "im", "im", "aj", "eju", "imaj", nom_p, "ich", "im", nil, "ich", "imi") if base.animacy == "wos" then add_decl(base, com.apply_palatalization(stems.oblique_vowel_stem), nil, nil, nil, nil, nil, nil, nil, nil, nil, "y") else add_decl(base, stems, nil, nil, nil, nil, nil, nil, nil, nil, nil, "e") end elseif rmatch(base.lemma, "^.*" .. com.inherently_soft_c .. "i$") then local nom_p = base.animacy == "wos" and "i" or "e" add_decl(base, stems, "eho", "emu", nil, "-", "im", "im", "ej", "eju", "imaj", nom_p, "ich", "im", nil, "ich", "imi") elseif rmatch(base.lemma, "^.*[czs]e$") then local nom_p = base.animacy == "wos" and "y" or "e" add_decl(base, stems, "eho", "emu", nil, "-", "ym", "ym", "aj", "eju", "ymaj", nom_p, "ych", "ym", nil, "ych", "ymi") else add_decl(base, stems, "eho", "emu", nil, "-", "ym", "ym", "aj", "eju", "ymaj", nom_p, "ych", "ym", nil, "ych", "ymi") if base.animacy == "wos" then add_decl(base, com.apply_palatalization(stems.oblique_vowel_stem), nil, nil, nil, nil, nil, nil, nil, nil, nil, "i") else add_decl(base, stems, nil, nil, nil, nil, nil, nil, nil, nil, nil, "e") end end end declprops["adje-m"] = { desc = function(base, stems) return "" end, cat = function(base, stems) return "masculine adjectival" end } decls["hard-f"] = function(base, stems) add_decl(base, stems, "y", nil, "u", "-", nil, "u", nil, "ow", "omaj", "y", "ow", "am", "y", "ach", "ami") add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, "e", nil, nil, "e", nil, "e") end declprops["hard-f"] = { desc = function(base, stems) return "žónski twjerdy zdónk" end, cat = function(base, stems) return "žónski twjerdy zdónk" end } decls["soft-f"] = function(base, stems) if rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$") then add_decl(base, com.addj(stems.oblique_vowel_stem), "e", nil, "-", "-", nil, "u", nil, "ow", "omaj", "e", "ow", "am", "e", "ach", "emi") else add_decl(base, stems, "e", nil, "u", "-", nil, "u", nil, "ow", "omaj", "e", "ow", "am", "e", "ach", "emi") end add_decl(base, com.removej(com.addj(stems.oblique_vowel_stem)), nil, "i", nil, nil, "i", nil, "i", nil, nil, nil, "i") end declprops["soft-f"] = { desc = function(base, stems) return "žónski mjechki zdónk" end, cat = function(base, stems) return "žónski mjechki zdónk" end } decls["czs-f"] = function(base, stems) if rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$") then add_decl(base, stems, "y", "y", "-", "-", "y", "u", "y", "ow", "omaj", "y", "ow", "am", "y", "ach", "ami") else add_decl(base, stems, "y", "y", "u", "-", "y", "u", "y", "ow", "omaj", "y", "ow", "am", "y", "ach", "ami") end end declprops["czs-f"] = { desc = function(base, stems) return "žónski twjerdy syčacy zdónk" end, cat = function(base, stems) return "žónski twjerdy zdónk" end } decls["velar-f"] = function(base, stems) add_decl(base, stems, "i", nil, "u", "-", nil, "u", nil, "ow", "omaj", "i", "ow", "am", "i", "ach", "ami") add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, "e", nil, nil, "e", nil, "e") end declprops["velar-f"] = { desc = function(base, stems) return "feminine velar stem" end, cat = function(base, stems) return "feminine velar stem" end } decls["v-f"] = function(base, stems) add_decl(base, stems, "wje", "wi", "-", "-", "wi", "wju", "wi", "wjow", "wjomaj", "wje", "wjow", "wjam", "wje", "wjach", "wjemi") end declprops["v-f"] = { desc = function(base, stems) return "feminine v-stem" end, cat = function(base, stems) return "feminine v-stem" end } decls["adje-f"] = function(base, stems) if rmatch(base.lemma, "^.*" .. com.velar_c .. "a$") or rmatch(base.lemma, "^.*" .. com.inherently_soft_c .. "a$") then add_decl(base, stems, "eje", "ej", "u", "-", "ej", "ej", "ej", "eju", "imaj", "e", "ich", "im", "e", "ich", "imi") else add_decl(base, stems, "eje", "ej", "u", "-", "ej", "ej", "ej", "eju", "ymaj", "e", "ych", "ym", "e", "ych", "ymi") end end declprops["adje-f"] = { desc = function(base, stems) return "" end, cat = function(base, stems) return "feminine adjectival" end } decls["hard-n"] = function(base, stems) add_decl(base, stems, "a", "u", "-", "-", nil, "om", nil, "ow", "omaj", "a", "ow", "am", "a", "ach", "ami") add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, nil, nil, nil, "e", nil, "e") end declprops["hard-n"] = { desc = function(base, stems) return "ničeji twjerdy zdónk" end, cat = function(base, stems) return "ničeji twjerdy zdónk" end } decls["soft-n"] = function(base, stems) add_decl(base, stems, "a", "u", "-", "-", "u", "om", nil, "ow", "omaj", "a", "ow", "am", "a", "ach", "emi") add_decl(base, com.removej(stems.oblique_vowel_stem), nil, nil, nil, nil, nil, nil, "i") end declprops["soft-n"] = { desc = function(base, stems) return "ničeji mjechki zdónk" end, cat = function(base, stems) return "ničeji mjechki zdónk" end } decls["czs-n"] = function(base, stems) add_decl(base, stems, "a", "u", "-", "-", "u", "om", "y", "ow", "omaj", "a", "ow", "am", "a", "ach", "ami") end declprops["czs-n"] = { desc = function(base, stems) return "ničeji twjerdy syčacy zdónk" end, cat = function(base, stems) return "ničeji twjerdy zdónk" end } decls["velar-n"] = function(base, stems) add_decl(base, stems, "a", "u", "-", "-", "u", "om", nil, "ow", "omaj", "a", "ow", "am", "a", "ach", "ami") add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, nil, nil, nil, "e", nil, "e") end declprops["velar-n"] = { desc = function(base, stems) return "neuter velar stem" end, cat = function(base, stems) return "neuter velar stem" end } decls["adje-n"] = function(base, stems) if rmatch(base.lemma, "^.*" .. com.velar_c .. "e$") or rmatch(base.lemma, "^.*" .. com.inherently_soft_c .. "e$") then add_decl(base, stems, "eho", "emu", "-", "-", "im", "im", "ej", "eju", "imaj", "e", "ich", "im", "e", "ich", "imi") else add_decl(base, stems, "eje", "ej", "u", "-", "ym", "ym", "ej", "eju", "ymaj", "e", "ych", "ym", "e", "ych", "ymi") end end declprops["adje-n"] = { desc = function(base, stems) return "" end, cat = function(base, stems) return "neuter adjectival" end } decls["tstem-n"] = function(base, stems) add_decl(base, stems, "eća", "eću", "-", "-", "eću", "ećom", "eći", "ećow", "ećomaj", "ata", "atow", "atam", "ata", "atach", "atami") end declprops["tstem-n"] = { desc = function(base, stems) return "neuter t-stem" end, cat = function(base, stems) return "neuter t-stem" end } decls["nstem-n"] = function(base, stems) add_decl(base, stems, "enja", "enju", "-", "-", "enju", "enjom", "eni", "enjow", "enjomaj", "enja", "enjow", "enjam", "enja", "enjach", "enjami") end declprops["nstem-n"] = { desc = function(base, stems) return "neuter n-stem" end, cat = function(base, stems) return "neuter n-stem" end } decls["adj"] = function(base, stems) local props = {} local propspec = table.concat(props, ".") if propspec ~= "" then propspec = "<" .. propspec .. ">" end local adj_alternant_multiword_spec = require("Modul:zlw-ocs-adjective").do_generate_forms({base.lemma .. propspec}) local function copy(from_slot, to_slot) base.forms[to_slot] = adj_alternant_multiword_spec.forms[from_slot] end if base.number ~= "mn" then if base.gender == "m" then copy("nom_m", "nom_s") copy("gen_mn", "gen_s") copy("dat_mn", "dat_s") copy("loc_mn", "loc_s") copy("ins_mn", "ins_s") elseif base.gender == "ž" then copy("nom_f", "nom_s") copy("gen_f", "gen_s") copy("dat_f", "dat_s") copy("acc_f", "acc_s") copy("loc_f", "loc_s") copy("ins_f", "ins_s") else copy("nom_n", "nom_s") copy("gen_mn", "gen_s") copy("dat_mn", "dat_s") copy("acc_n", "acc_s") copy("loc_mn", "loc_s") copy("ins_mn", "ins_s") end if not base.forms.voc_s then iut.insert_forms(base.forms, "voc_s", base.forms.nom_s) end end if base.number ~= "jed" then if base.gender == "m" then copy("nom_mp", "nom_p") copy("acc_mfp", "acc_p") copy("nom_md", "nom_d") elseif base.gender == "ž" then copy("nom_fp", "nom_p") copy("acc_mfp", "acc_p") copy("nom_fnd", "nom_d") else copy("nom_np", "nom_p") copy("acc_np", "acc_p") copy("nom_fnd", "nom_d") end copy("gen_p", "gen_p") copy("dat_p", "dat_p") copy("ins_p", "ins_p") copy("loc_p", "loc_p") copy("gen_d", "gen_d") copy("dat_d", "dat_d") end end local function get_stemtype(base) if rfind(base.lemma, "ý$") then return "hard" elseif rfind(base.lemma, "í$") then return "soft" else return "possessive" end end declprops["adj"] = { cat = function(base, stems) return {"adjectival POS", get_stemtype(base) .. " GENDER adjectival POS"} end, } decls["indecl"] = function(base, stems) -- Indeclinable. Note that fully indeclinable nouns should not have a table at all rather than one all of whose forms -- are the same; but having an indeclinable declension is useful for nouns that may or may not be indeclinable, e.g. -- [[desatero]] "group of ten" or the plural of [[peso]], which may be indeclinable 'pesos'. add_decl(base, stems, "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-") end declprops["indecl"] = { cat = function(base, stems) if base.adj then return {"adjectival POS", "indeclinable adjectival POS", "indeclinable GENDER adjectival POS"} else return {"indeclinable POS", "indeclinable GENPOS"} end end } decls["manual"] = function(base, stems) -- Anything declined manually using overrides. We don't set any declensions except the nom_s (or nom_p if plurale -- tantum). add(base, base.number == "mn" and "nom_p" or "nom_s", stems, "-") end declprops["manual"] = { desc = "GENDER", cat = {}, } local function set_pron_defaults(base) if base.gender or base.lemma ~= "ona" and base.number or base.animacy then error("Can't specify gender, number or animacy for pronouns") end local function pron_props() -- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC. if base.lemma == "štó" then return "none", "jed", "wos", false elseif base.lemma == "što" then return "none", "jed", "wěc", false else error(("Unrecognized pronoun '%s'"):format(base.lemma)) end end local gender, number, animacy, has_clitic = pron_props() base.gender = gender base.actual_gender = gender base.number = number base.actual_number = number base.animacy = animacy base.actual_animacy = animacy base.has_clitic = has_clitic end local function determine_pronoun_stems(base) if base.stem_sets then error("Reducible and vowel alternation specs cannot be given with pronouns") end base.stem_sets = {{reducible = false, vowel_stem = "", nonvowel_stem = ""}} base.decl = "pron" end decls["pron"] = function(base, stems) if base.lemma == "štó" then add_decl(base, stems, "koho", "komu", nil, nil, "kim", "kim") elseif base.lemma == "što" then add_decl(base, stems, "čeho", "čemu", nil, nil, "čim", "čim") else error(("Internal error: Unrecognized pronoun lemma '%s'"):format(base.lemma)) end end declprops["pron"] = { desc = "GENDER pronoun", cat = {}, } local function set_num_defaults(base) if base.gender or base.animacy then error("Can't specify gender, number or animacy for numeral") end local function num_props() -- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC. return "none", "mn", "none", false end local gender, number, animacy, has_clitic = num_props() base.gender = gender base.actual_gender = gender base.number = number base.actual_number = number base.animacy = animacy base.actual_animacy = animacy base.has_clitic = has_clitic end local function set_det_defaults(base) if base.gender or base.number or base.animacy then error("Can't specify gender, number or animacy for determiner") end local function det_props() -- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC. return "none", "none", "none", false end local gender, number, animacy, has_clitic = det_props() base.gender = gender base.actual_gender = gender base.number = number base.actual_number = number base.animacy = animacy base.actual_animacy = animacy base.has_clitic = has_clitic end local function determine_determiner_stems(base) if base.stem_sets then error("Reducible and vowel alternation specs cannot be given with determiners") end local stem = rmatch(base.lemma, "^(.*)" .. com.vowel_c .. "$") or base.lemma base.stem_sets = {{reducible = false, vowel_stem = stem, nonvowel_stem = stem}} base.decl = "det" end decls["det"] = function(base, stems) add_sg_decl(base, stems, "a", "a", "-", nil, "a", "a") end declprops["det"] = { desc = "GENDER determiner", cat = {}, } local function fetch_footnotes(separated_group) local footnotes for j = 2, #separated_group - 1, 2 do if separated_group[j + 1] ~= "" then error("Extraneous text after bracketed footnotes: '" .. table.concat(separated_group) .. "'") end if not footnotes then footnotes = {} end table.insert(footnotes, separated_group[j]) end return footnotes end local function parse_override(segments) local retval = {values = {}} local part = segments[1] local slots = {} while true do local case = usub(part, 1, 3) if cases[case] then -- ok else error(("Unrecognized case '%s' in override: '%s'"):format(case, table.concat(segments))) end part = usub(part, 4) local slot if rfind(part, "^pl") then part = usub(part, 3) slot = case .. "_p" elseif rfind(part, "^du") then part = usub(part, 3) slot = case .. "_d" else slot = case .. "_s" end table.insert(slots, slot) if rfind(part, "^%+") then part = usub(part, 2) else break end end if rfind(part, "^:") then retval.full = true part = usub(part, 2) end segments[1] = part local colon_separated_groups = iut.split_alternating_runs_and_strip_spaces(segments, ":") for i, colon_separated_group in ipairs(colon_separated_groups) do local value = {} local form = colon_separated_group[1] if form == "" then error(("Use - to indicate an empty ending for slot%s '%s': '%s'"):format(#slots > 1 and "s" or "", table.concat(slots), table.concat(segments))) elseif form == "-" then value.form = "" else value.form = form end value.footnotes = fetch_footnotes(colon_separated_group) table.insert(retval.values, value) end return slots, retval end --[=[ Parse an indicator spec (text consisting of angle brackets and zero or more dot-separated indicators within them). Return value is an object of the form { overrides = { SLOT = {OVERRIDE, OVERRIDE, ...}, -- as returned by parse_override() ... }, forms = {}, -- forms for a single spec alternant; see `forms` below footnotes = {"FOOTNOTE", "FOOTNOTE", ...}, -- may be missing stems = { -- may be missing { reducible = TRUE_OR_FALSE, footnotes = {"FOOTNOTE", "FOOTNOTE", ...}, -- may be missing -- The following fields are filled in by determine_stems() vowel_stem = "STEM", nonvowel_stem = "STEM", oblique_slots = "all", oblique_vowel_stem = "STEM" or nil (only needs to be set if oblique_slots is non-nil), oblique_nonvowel_stem = "STEM" or nil (only needs to be set if oblique_slots is non-nil), }, ... }, gender = "GENDER", -- "m", "ž", "n" number = "NUMBER", -- "jed", "mn"; may be missing animacy = "ANIMACY", -- "wěc", "an"; may be missing hard = true, -- may be missing soft = true, -- may be missing mixed = true, -- may be missing surname = true, -- may be missing istem = true, -- may be missing ["-istem"] = true, -- may be missing tstem = true, -- may be missing nstem = true, -- may be missing tech = true, -- may be missing foreign = true, -- may be missing mostlyindecl = true, -- may be missing indecl = true, -- may be missing manual = true, -- may be missing adj = true, -- may be missing decllemma = "DECLENSION-LEMMA", -- may be missing declgender = "DECLENSION-GENDER", -- may be missing declnumber = "DECLENSION-NUMBER", -- may be missing -- The following additional fields are added by other functions: orig_lemma = "ORIGINAL-LEMMA", -- as given by the user orig_lemma_no_links = "ORIGINAL-LEMMA-NO-LINKS", -- links removed lemma = "LEMMA", -- `orig_lemma_no_links`, converted to singular form if plural and lowercase if all-uppercase forms = { SLOT = { { form = "FORM", footnotes = {"FOOTNOTE", "FOOTNOTE", ...} -- may be missing }, ... }, ... }, decl = "DECL", -- declension, e.g. "hard-m" vowel_stem = "VOWEL-STEM", -- derived from vowel-ending lemmas nonvowel_stem = "NONVOWEL-STEM", -- derived from non-vowel-ending lemmas } ]=] local function parse_indicator_spec(angle_bracket_spec) local inside = rmatch(angle_bracket_spec, "^<(.*)>$") assert(inside) local base = {overrides = {}, forms = {}} if inside ~= "" then local segments = iut.parse_balanced_segment_run(inside, "[", "]") local dot_separated_groups = iut.split_alternating_runs_and_strip_spaces(segments, "%.") for i, dot_separated_group in ipairs(dot_separated_groups) do local part = dot_separated_group[1] local case_prefix = usub(part, 1, 3) if cases[case_prefix] then local slots, override = parse_override(dot_separated_group) for _, slot in ipairs(slots) do if base.overrides[slot] then error(("Two overrides specified for slot '%s'"):format(slot)) else base.overrides[slot] = {override} end end elseif part == "" then if #dot_separated_group == 1 then error("Blank indicator: '" .. inside .. "'") end base.footnotes = fetch_footnotes(dot_separated_group) elseif rfind(part, "^[-*#ě]*$") or rfind(part, "^[-*#ě]*,") then if base.stem_sets then error("Can't specify reducible/vowel-alternant indicator twice: '" .. inside .. "'") end local comma_separated_groups = iut.split_alternating_runs_and_strip_spaces(dot_separated_group, ",") local stem_sets = {} for i, comma_separated_group in ipairs(comma_separated_groups) do local pattern = comma_separated_group[1] local orig_pattern = pattern local reducible, vowelalt, oblique_slots if pattern == "-" then -- default reducible, no vowel alt else local before, after before, reducible, after = rmatch(pattern, "^(.-)(%-?%*)(.-)$") if before then pattern = before .. after reducible = reducible == "*" end if pattern ~= "" then if not rfind(pattern, "^##?ě?$") then error("Unrecognized vowel-alternation pattern '" .. pattern .. "', should be one of #, ##, #ě or ##ě: '" .. inside .. "'") end if pattern == "#ě" or pattern == "##ě" then vowelalt = "quant-ě" else vowelalt = "quant" end -- `oblique_slots` will be later changed to "all" if the lemma ends in a consonant. oblique_slots = "all" end end table.insert(stem_sets, { reducible = reducible, vowelalt = vowelalt, oblique_slots = oblique_slots, footnotes = fetch_footnotes(comma_separated_group) }) end base.stem_sets = stem_sets elseif #dot_separated_group > 1 then error("Footnotes only allowed with slot overrides, reducible or vowel alternation specs or by themselves: '" .. table.concat(dot_separated_group) .. "'") elseif part == "m" or part == "ž" or part == "n" then if base.gender then error("Can't specify gender twice: '" .. inside .. "'") end base.gender = part elseif part == "jed" or part == "dw" or part == "mn" then if base.number then error("Can't specify number twice: '" .. inside .. "'") end base.number = part elseif part == "wos" or part == "zwj" or part == "wěc" then if base.animacy then error("Can't specify animacy twice: '" .. inside .. "'") end base.animacy = part elseif part == "hard" or part == "soft" or part == "istem" or part == "tstem" or part == "nstem" or part == "indecl" or part == "pron" or part == "det" or part == "velar" or part == "vstem" or part == "adje" then if base[part] then error("Can't specify '" .. part .. "' twice: '" .. inside .. "'") end base[part] = true elseif part == "+" then if base.adj then error("Can't specify '+' twice: '" .. inside .. "'") end base.adj = true elseif part == "!" then if base.manual then error("Can't specify '!' twice: '" .. inside .. "'") end base.manual = true elseif rfind(part, "^mixedistem:") then if base.mixedistem then error("Can't specify 'mixedistem:' twice: '" .. inside .. "'") end base.mixedistem = rsub(part, "^mixedistem:", "") elseif rfind(part, "^decllemma:") then if base.decllemma then error("Can't specify 'decllemma:' twice: '" .. inside .. "'") end base.decllemma = rsub(part, "^decllemma:", "") elseif rfind(part, "^declgender:") then if base.declgender then error("Can't specify 'declgender:' twice: '" .. inside .. "'") end base.declgender = rsub(part, "^declgender:", "") elseif rfind(part, "^declnumber:") then if base.declnumber then error("Can't specify 'declnumber:' twice: '" .. inside .. "'") end base.declnumber = rsub(part, "^declnumber:", "") else error("Unrecognized indicator '" .. part .. "': '" .. inside .. "'") end end end return base end local function is_regular_noun(base) return not base.adj and not base.pron and not base.det and not base.num end local function process_declnumber(base) base.actual_number = base.number if base.declnumber then if base.declnumber == "jed" or base.declnumber == "dw" or base.declnumber == "mn" then base.number = base.declnumber else error(("Unrecognized value '%s' for 'declnumber', should be 'sg' or 'pl'"):format(base.declnumber)) end end end local function set_defaults_and_check_bad_indicators(base) -- Set default values. local regular_noun = is_regular_noun(base) if base.pron then set_pron_defaults(base) elseif base.det then set_det_defaults(base) elseif base.num then set_num_defaults(base) elseif not base.adj then if not base.gender then if base.manual then base.gender = "none" else error("For nouns, gender must be specified") end end base.number = base.number or "allthree" process_declnumber(base) base.animacy = base.animacy or "wěc" base.actual_gender = base.gender base.actual_animacy = base.animacy if base.declgender then if base.declgender == "m-an" then base.gender = "m" base.animacy = "wos" elseif base.declgender == "m-in" then base.gender = "m" base.animacy = "wěc" elseif base.declgender == "ž" or base.declgender == "n" then base.gender = base.declgender else error(("Unrecognized value '%s' for 'declgender', should be 'm-an', 'm-in', 'f' or 'n'"):format(base.declgender)) end end end -- Check for bad indicator combinations. if (base.hard and 1 or 0) + (base.soft and 1 or 0) > 1 then error("At most one of 'hard' or 'soft' can be specified") end if base.istem and base["-istem"] then error("'istem' and '-istem' cannot be specified together") end if (base.istem or base["-istem"]) then if not regular_noun then error("'istem' and '-istem' can only be specified with regular nouns") end end if base.declgender and not regular_noun then error("'declgender' can only be specified with regular nouns") end end local function set_all_defaults_and_check_bad_indicators(alternant_multiword_spec) local is_multiword = #alternant_multiword_spec.alternant_or_word_specs > 1 iut.map_word_specs(alternant_multiword_spec, function(base) set_defaults_and_check_bad_indicators(base) base.multiword = is_multiword -- FIXME: not currently used; consider deleting alternant_multiword_spec.has_clitic = alternant_multiword_spec.has_clitic or base.has_clitic if base.pron then alternant_multiword_spec.saw_pron = true else alternant_multiword_spec.saw_non_pron = true end if base.det then alternant_multiword_spec.saw_det = true else alternant_multiword_spec.saw_non_det = true end if base.num then alternant_multiword_spec.saw_num = true else alternant_multiword_spec.saw_non_num = true end end) end local function undo_second_palatalization(base, word, is_adjective) local function try(from, to) local stem = rmatch(word, "^(.*)" .. from .. "$") if stem then return stem .. to end return nil end return is_adjective and try("št", "sk") or is_adjective and try("čt", "ck") or try("c", "k") or -- FIXME, this could be wrong and c correct try("ř", "r") or try("z", "h") or -- FIXME, this could be wrong and z or g correct try("š", "ch") or word end -- For a plural-only lemma, synthesize a likely singular lemma. It doesn't have to be -- theoretically correct as long as it generates all the correct plural forms. local function synthesize_singular_lemma(base) if not base.stem_sets then base.stem_sets = {{}} end local lemma_determined -- Loop over all stem sets in case the user specified multiple ones (e.g. '*,-*'). If we try to reconstruct -- different lemmas for different stem sets, we'll throw an error below. for _, stems in ipairs(base.stem_sets) do local stem, lemma while true do if base.indecl then -- If specified as indeclinable, leave it alone; e.g. 'pesos' indeclinable plural of [[peso]]. lemma = base.lemma break elseif base.gender == "m" then stem = rmatch(base.lemma, "^(.*)i$") if stem then if base.soft then -- [[Blíženci]] "Gemini" -- Since the nominative singular has no ending. lemma = com.convert_paired_plain_to_palatal(stem, ending) else lemma = undo_second_palatalization(base, stem) end else stem = rmatch(base.lemma, "^(.*)ové$") or rmatch(base.lemma, "^(.*)[éyě]$") or rmatch(base.lemma, "^(.*)ie$") if stem then -- [[manželé]] "married couple", [[Velšané]] "Welsh people" lemma = stem else error(("Masculine plural-only lemma '%s' should end in -i, -ové or -é"):format(base.lemma)) end end if stems.reducible == nil then if rfind(lemma, com.cons_c .. "[ck]$") and not com.is_monosyllabic(base.lemma) then stems.reducible = true end if stems.reducible then lemma = dereduce(base, lemma) end end break elseif base.gender == "ž" then stem = rmatch(base.lemma, "^(.*)y$") if stem then lemma = stem .. "a" break end stem = rmatch(base.lemma, "^(.*)[eě]$") if stem then -- Singular like the plural. Cons-stem feminines like [[dlaň]] "palm (of the hand)" have identical -- plurals to soft-stem feminines like [[růže]] (modulo e/ě differences), so we don't need to -- reconstruct the former type. lemma = base.lemma break end stem = rmatch(base.lemma, "^(.*)i$") if stem then -- i-stems. lemma = stem base.istem = true break end error(("Feminine plural-only lemma '%s' should end in -y, -ě, -e or -i"):format(base.lemma)) elseif base.gender == "n" then -- -ata nouns like [[slůně]] "baby elephant" nom pl 'slůňata' are declined in the plural same as if -- the singular were 'slůňato' so we don't have to worry about them. stem = rmatch(base.lemma, "^(.*)a$") if stem then lemma = stem .. "o" break end stem = rmatch(base.lemma, "^(.*)[eěí]$") if stem then -- singular lemma also in -e, -ě or -í; e.g. [[věčná loviště]] "[[happy hunting ground]]" lemma = base.lemma break end error(("Neuter plural-only lemma '%s' should end in -a, -í, -ě or -e"):format(base.lemma)) else error(("Internal error: Unrecognized gender '%s'"):format(base.gender)) end end if lemma_determined and lemma_determined ~= lemma then error(("Attempt to set two different singular lemmas '%s' and '%s'"):format(lemma_determined, lemma)) end lemma_determined = lemma end base.lemma = lemma_determined end -- For an adjectival lemma, synthesize the masc singular form. local function synthesize_adj_lemma(base) local stem if base.indecl then base.decl = "indecl" stem = base.lemma else local gender, number local function sub_ov(stem) stem = stem:gsub("ov$", "ův") return stem end while true do if base.number == "mn" then if base.gender == "m" then stem = rmatch(base.lemma, "^(.*)í$") if stem then if base.soft then -- nothing to do else if base.animacy ~= "wos" then error(("Masculine plural-only adjectival lemma '%s' ending in -í can only be animate unless '.soft' is specified"): format(base.lemma)) end base.lemma = undo_second_palatalization(base, stem, "is adjective") .. "ý" end break end stem = rmatch(base.lemma, "^(.*)é$") if stem then if base.animacy == "wos" then error(("Masculine plural-only adjectival lemma '%s' ending in -é must be inanimate"): format(base.lemma)) end base.lemma = stem .. "ý" break end stem = rmatch(base.lemma, "^(.*ov)i$") or rmatch(base.lemma, "^(.*in)i$") if stem then if base.animacy ~= "wos" then error(("Masculine plural-only possessive adjectival lemma '%s' ending in -i must be animate"): format(base.lemma)) end base.lemma = sub_ov(stem) break end stem = rmatch(base.lemma, "^(.*ov)y$") or rmatch(base.lemma, "^(.*in)y$") if stem then if base.animacy == "wos" then error(("Masculine plural-only possessive adjectival lemma '%s' ending in -y must be inanimate"): format(base.lemma)) end base.lemma = sub_ov(stem) break end if base.animacy == "wos" then error(("Animate masculine plural-only adjectival lemma '%s' should end in -í, -ovi or -ini"): format(base.lemma)) elseif base.soft then error(("Soft masculine plural-only adjectival lemma '%s' should end in -í"):format(base.lemma)) else error(("Inanimate masculine plural-only adjectival lemma '%s' should end in -é, -ovy or -iny"): format(base.lemma)) end elseif base.gender == "ž" then stem = rmatch(base.lemma, "^(.*)é$") -- hard adjective if stem then base.lemma = stem .. "ý" break end stem = rmatch(base.lemma, "^(.*)í$") -- soft adjective if stem then break end stem = rmatch(base.lemma, "^(.*ov)y$") or rmatch(base.lemma, "^(.*in)y$") -- possessive adjective if stem then base.lemma = sub_ov(stem) break end error(("Feminine plural-only adjectival lemma '%s' should end in -é, -í, -ovy or -iny"):format(base.lemma)) else stem = rmatch(base.lemma, "^(.*)á$") -- hard adjective if stem then base.lemma = stem .. "ý" break end stem = rmatch(base.lemma, "^(.*)í$") -- soft adjective if stem then break end stem = rmatch(base.lemma, "^(.*ov)a$") or rmatch(base.lemma, "^(.*in)a$") -- possessive adjective if stem then base.lemma = sub_ov(stem) break end error(("Neuter plural-only adjectival lemma '%s' should end in -á, -í, -ova or -ina"):format(base.lemma)) end else if base.gender == "m" then stem = rmatch(base.lemma, "^(.*)[ýí]$") or rmatch(base.lemma, "^(.*)ův$") or rmatch(base.lemma, "^(.*)in$") if stem then break end error(("Masculine adjectival lemma '%s' should end in -ý, -í, -ův or -in"):format(base.lemma)) elseif base.gender == "ž" then stem = rmatch(base.lemma, "^(.*)á$") if stem then base.lemma = stem .. "ý" break end stem = rmatch(base.lemma, "^(.*)í$") if stem then break end stem = rmatch(base.lemma, "^(.*ov)a$") or rmatch(base.lemma, "^(.*in)a$") if stem then base.lemma = sub_ov(stem) break end error(("Feminine adjectival lemma '%s' should end in -á, -í, -ova or -ina"):format(base.lemma)) else stem = rmatch(base.lemma, "^(.*)í$") if stem then break end stem = rmatch(base.lemma, "^(.*ov)o$") or rmatch(base.lemma, "^(.*in)o$") if stem then base.lemma = sub_ov(stem) break end error(("Neuter adjectival lemma '%s' should end in -é, -í, -ovo or -ino"):format(base.lemma)) end end end base.decl = "adj" end -- Now set the stem sets if not given. -- Now set the stem sets if not given. if not base.stem_sets then base.stem_sets = {{reducible = false}} end for _, stems in ipairs(base.stem_sets) do -- Set the stems. stems.vowel_stem = stem stems.nonvowel_stem = stem end end -- Determine the declension based on the lemma, gender and number. The declension is set in base.decl. In the process, -- we set either base.vowel_stem (if the lemma ends in a vowel) or base.nonvowel_stem (if the lemma does not end in a -- vowel), which is used by determine_stems(). In some cases (specifically with certain foreign nouns), we set -- base.lemma to a new value; this is as if the user specified 'decllemma:'. local function determine_declension(base) if base.indecl then base.decl = "indecl" base.nonvowel_stem = base.lemma return end -- Determine declension stem = rmatch(base.lemma, "^(.*)a$") if stem then if base.gender == "m" then if base.animacy ~= "wos" then error("Masculine lemma in -a must be animate") end base.decl = "a-m" elseif base.gender == "ž" then if base.hard then base.decl = "hard-f" elseif base.soft then base.decl = "soft-f" elseif base.adje then base.decl = "adje-f" elseif rfind(base.lemma, com.velar_c .. "a$") then base.decl = "velar-f" elseif rfind(base.lemma, "[czs]" .. "a$") then base.decl = "czs-f" elseif rfind(base.lemma, com.inherently_soft_c .. "a$") then base.decl = "soft-f" else base.decl = "hard-f" end elseif base.gender == "n" then if rfind(stem, "m$") then base.decl = "ma-n" else error("Lemma ending in -a and neuter must end in -ma") end end base.vowel_stem = stem return end local ending stem, ending = rmatch(base.lemma, "^(.*)e$") if stem then if base.tstem then base.decl = "tstem-n" elseif base.adje then base.decl = "adje-n" else base.decl = "soft-n" end base.vowel_stem = stem return end stem = rmatch(base.lemma, "^(.*)o$") if stem then if base.gender == "m" then -- Cf. [[maestro]] m. base.decl = "o-m" elseif base.gender == "ž" then -- [[zoo]]; [[Žemaitsko]]? error("Feminine nouns in -o are indeclinable; use '.indecl' if needed") elseif base.hard then base.decl = "hard-n" elseif base.tstem then base.decl = "tstem-n" elseif base.nstem then base.decl = "nstem-n" elseif rfind(base.lemma, "[czs]" .. "o$") then base.decl = "czs-n" elseif rfind(base.lemma, com.inherently_soft_c .. "o$") then base.decl = "soft-n" elseif rfind(base.lemma, com.velar_c .. "o$") then base.decl = "velar-n" else base.decl = "hard-n" end base.vowel_stem = stem return end stem = rmatch(base.lemma, "^(.*)[iy]$") if stem then if base.gender == "m" then if base.adje then base.decl = "adje-m" end end base.vowel_stem = stem return end stem = rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$") if stem then if base.gender == "m" then if base.hard then base.decl = "hard-m" elseif base.soft then base.decl = "soft-m" elseif rfind(base.lemma, com.velar_c .. "$") then base.decl = "velar-m" elseif rfind(base.lemma, "[czs]" .. "$") then base.decl = "czs-m" elseif rfind(base.lemma, com.inherently_soft_c .. "$") then base.decl = "soft-m" else base.decl = "hard-m" end elseif base.gender == "ž" then if base.vstem then base.decl = "v-f" stem = rmatch(base.lemma, "^(.*)ej$") elseif base.soft then base.decl = "soft-f" elseif rfind(base.lemma, "[czs]" .. "$") then base.decl = "czs-f" else base.decl = "soft-f" end elseif base.gender == "n" then if base.foreign then stem = rmatch(base.lemma, "^(.*)um$") or rmatch(base.lemma, "^(.*)on$") if not stem then error("Unrecognized neuter foreign ending, should be -um or -on") end if base.hard then base.decl = "hard-n" elseif rfind(stem, "[eiuy]$") then base.decl = "semisoft-n" else base.decl = "hard-n" end -- set the lemma here as if decllemma: were given base.lemma = stem .. "o" base.vowel_stem = stem return else error("Neuter nouns ending in a consonant should use '.foreign' or '.decllemma:...'") end end base.nonvowel_stem = stem return end error("Unrecognized ending for lemma: '" .. base.lemma .. "'") end -- Determine the default value for the 'reducible' flag. local function determine_default_reducible(base) -- Nouns in vowels other than -a/o as well as masculine nouns ending in all vowels don't have null endings so not -- reducible. Note, we are never called on adjectival nouns. if rfind(base.lemma, "[iyuíeě]$") or base.gender == "m" and rfind(base.lemma, "[ao]$") or base.tstem then base.default_reducible = false return end local stem stem = rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$") if stem then if base.gender == "m" and rfind(stem, "e[ck]$") and not com.is_monosyllabic(stem) then base.default_reducible = true elseif base.gender == "ž" and rfind(stem, "eń$") then -- pěseń base.default_reducible = true else base.default_reducible = false end return end base.default_reducible = false end -- Determine the stems to use for each stem set: vowel and nonvowel stems, for singular -- and plural. We assume that one of base.vowel_stem or base.nonvowel_stem has been -- set in determine_declension(), depending on whether the lemma ends in -- a vowel. We construct all the rest given the reducibility, vowel alternation spec and -- any explicit stems given. We store the determined stems inside of the stem-set objects -- in `base.stem_sets`, meaning that if the user gave multiple reducible or vowel-alternation -- patterns, we will compute multiple sets of stems. The reason is that the stems may vary -- depending on the reducibility and vowel alternation. local function determine_stems(base) if not base.stem_sets then base.stem_sets = {{}} end -- Set default reducible and check for default mixed reducible, which needs to be expanded into two entries. local default_mixed_reducible = false for _, stems in ipairs(base.stem_sets) do if stems.reducible == nil then stems.reducible = base.default_reducible end end if default_mixed_reducible then local new_stem_sets = {} for _, stems in ipairs(base.stem_sets) do table.insert(new_stem_sets, stems) end base.stem_sets = new_stem_sets end -- Now determine all the stems for each stem set. for _, stems in ipairs(base.stem_sets) do local lemma_is_vowel_stem = not not base.vowel_stem if base.vowel_stem then stems.vowel_stem = base.vowel_stem stems.nonvowel_stem = stems.vowel_stem -- Apply vowel alternation first in cases like jádro -> jader; apply_vowel_alternation() will throw an error -- if the vowel being modified isn't the last vowel in the stem. stems.oblique_nonvowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.nonvowel_stem) if stems.reducible then stems.nonvowel_stem = dereduce(base, stems.nonvowel_stem) stems.oblique_nonvowel_stem = dereduce(base, stems.oblique_nonvowel_stem) end else stems.nonvowel_stem = base.nonvowel_stem -- The user specified #. E.g. nóc nocy if stems.oblique_slots then stems.oblique_slots = "all" end stems.oblique_nonvowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.nonvowel_stem) if stems.reducible then stems.vowel_stem = com.reduce(base.nonvowel_stem) if not stems.vowel_stem then error("Unable to reduce stem '" .. base.nonvowel_stem .. "'") end else stems.vowel_stem = base.nonvowel_stem end end stems.oblique_vowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.vowel_stem) end end local function detect_indicator_spec(base) if base.pron then determine_pronoun_stems(base) elseif base.det then determine_determiner_stems(base) elseif base.num then determine_numeral_stems(base) elseif base.adj then process_declnumber(base) synthesize_adj_lemma(base) elseif base.manual then if base.stem_sets then -- FIXME, maybe this should be allowed? error("Reducible and vowel alternation specs cannot be given with manual declensions") end base.stem_sets = {{reducible = false, vowel_stem = "", nonvowel_stem = ""}} base.decl = "manual" else if base.number == "mn" then synthesize_singular_lemma(base) end determine_declension(base) determine_default_reducible(base) determine_stems(base) end end local function detect_all_indicator_specs(alternant_multiword_spec) alternant_multiword_spec.sg_genders = {} alternant_multiword_spec.pl_genders = {} iut.map_word_specs(alternant_multiword_spec, function(base) detect_indicator_spec(base) if base.number ~= "mn" then alternant_multiword_spec.sg_genders[base.actual_gender] = true end if base.number ~= "jed" then -- All t-stem masculines are neuter in the plural. local plgender plgender = base.actual_gender alternant_multiword_spec.pl_genders[plgender] = true end end) if (alternant_multiword_spec.saw_pron and 1 or 0) + (alternant_multiword_spec.saw_det and 1 or 0) + (alternant_multiword_spec.saw_num and 1 or 0) > 1 then error("Can't combine pronouns, determiners and/or numerals") end end local propagate_multiword_properties local function propagate_alternant_properties(alternant_spec, property, mixed_value, nouns_only) local seen_property for _, multiword_spec in ipairs(alternant_spec.alternants) do propagate_multiword_properties(multiword_spec, property, mixed_value, nouns_only) if seen_property == nil then seen_property = multiword_spec[property] elseif multiword_spec[property] and seen_property ~= multiword_spec[property] then seen_property = mixed_value end end alternant_spec[property] = seen_property end propagate_multiword_properties = function(multiword_spec, property, mixed_value, nouns_only) local seen_property = nil local last_seen_nounal_pos = 0 local word_specs = multiword_spec.alternant_or_word_specs or multiword_spec.word_specs for i = 1, #word_specs do local is_nounal if word_specs[i].alternants then propagate_alternant_properties(word_specs[i], property, mixed_value) is_nounal = not not word_specs[i][property] elseif nouns_only then is_nounal = is_regular_noun(word_specs[i]) else is_nounal = not not word_specs[i][property] end if is_nounal then if not word_specs[i][property] then error("Internal error: noun-type word spec without " .. property .. " set") end for j = last_seen_nounal_pos + 1, i - 1 do word_specs[j][property] = word_specs[j][property] or word_specs[i][property] end last_seen_nounal_pos = i if seen_property == nil then seen_property = word_specs[i][property] elseif seen_property ~= word_specs[i][property] then seen_property = mixed_value end end end if last_seen_nounal_pos > 0 then for i = last_seen_nounal_pos + 1, #word_specs do word_specs[i][property] = word_specs[i][property] or word_specs[last_seen_nounal_pos][property] end end multiword_spec[property] = seen_property end local function propagate_properties_downward(alternant_multiword_spec, property, default_propval) local function set_and_fetch(obj, default) local retval if obj[property] then retval = obj[property] else obj[property] = default retval = default end if not obj["actual_" .. property] then obj["actual_" .. property] = retval end return retval end local propval1 = set_and_fetch(alternant_multiword_spec, default_propval) for _, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do local propval2 = set_and_fetch(alternant_or_word_spec, propval1) if alternant_or_word_spec.alternants then for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do local propval3 = set_and_fetch(multiword_spec, propval2) for _, word_spec in ipairs(multiword_spec.word_specs) do local propval4 = set_and_fetch(word_spec, propval3) if propval4 == "mixed" then -- FIXME, use clearer error message. error("Attempt to assign mixed " .. property .. " to word") end set_and_fetch(word_spec, propval4) end end else if propval2 == "mixed" then -- FIXME, use clearer error message. error("Attempt to assign mixed " .. property .. " to word") end set_and_fetch(alternant_or_word_spec, propval2) end end end --[=[ Propagate `property` (one of "animacy", "gender" or "number") from nouns to adjacent adjectives. We proceed as follows: 1. We assume the properties in question are already set on all nouns. This should happen in set_defaults_and_check_bad_indicators(). 2. We first propagate properties upwards and sideways. We recurse downwards from the top. When we encounter a multiword spec, we proceed left to right looking for a noun. When we find a noun, we fetch its property (recursing if the noun is an alternant), and propagate it to any adjectives to its left, up to the next noun to the left. When we have processed the last noun, we also propagate its property value to any adjectives to the right (to handle e.g. [[anděl strážný]] "guardian angel", where the adjective [[strážný]] should inherit the 'masculine' and 'animate' properties of [[anděl]]). Finally, we set the property value for the multiword spec itself by combining all the non-nil properties of the individual elements. If all non-nil properties have the same value, the result is that value, otherwise it is `mixed_value` (which is "mixed" for animacy and gender, but "allthree" for number). 3. When we encounter an alternant spec in this process, we recursively process each alternant (which is a multiword spec) using the previous step, and combine any non-nil properties we encounter the same way as for multiword specs. 4. The effect of steps 2 and 3 is to set the property of each alternant and multiword spec based on its children or its neighbors. ]=] local function propagate_properties(alternant_multiword_spec, property, default_propval, mixed_value) propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, "nouns only") propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, false) propagate_properties_downward(alternant_multiword_spec, property, default_propval) end local function determine_noun_status(alternant_multiword_spec) for i, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do if alternant_or_word_spec.alternants then local is_noun = false for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do for j, word_spec in ipairs(multiword_spec.word_specs) do if is_regular_noun(word_spec) then multiword_spec.first_noun = j is_noun = true break end end end if is_noun then alternant_multiword_spec.first_noun = i end elseif is_regular_noun(alternant_or_word_spec) then alternant_multiword_spec.first_noun = i return end end end -- Set the part of speech based on properties of the individual words. local function set_pos(alternant_multiword_spec) if alternant_multiword_spec.args.pos then alternant_multiword_spec.pos = alternant_multiword_spec.args.pos elseif alternant_multiword_spec.saw_pron and not alternant_multiword_spec.saw_non_pron then alternant_multiword_spec.pos = "pronoun" elseif alternant_multiword_spec.saw_det and not alternant_multiword_spec.saw_non_det then alternant_multiword_spec.pos = "determiner" elseif alternant_multiword_spec.saw_num and not alternant_multiword_spec.saw_non_num then alternant_multiword_spec.pos = "numeral" else alternant_multiword_spec.pos = "noun" end alternant_multiword_spec.plpos = require("Modul:string utilities").pluralize(alternant_multiword_spec.pos) end local function normalize_all_lemmas(alternant_multiword_spec, pagename) iut.map_word_specs(alternant_multiword_spec, function(base) if base.lemma == "" then base.lemma = pagename end base.orig_lemma = base.lemma base.orig_lemma_no_links = m_links.remove_links(base.lemma) local lemma = base.orig_lemma_no_links -- If the lemma is all-uppercase, lowercase it but note this, so that later in combine_stem_ending() we convert it -- back to uppercase. This allows us to handle all-uppercase acronyms without a lot of extra complexity. -- FIXME: This may not make sense at all. if uupper(lemma) == lemma then base.all_uppercase = true lemma = ulower(lemma) end base.actual_lemma = lemma base.lemma = base.decllemma or lemma end) end local function decline_noun(base) for _, stems in ipairs(base.stem_sets) do if not decls[base.decl] then error("Internal error: Unrecognized declension type '" .. base.decl .. "'") end decls[base.decl](base, stems) end handle_derived_slots_and_overrides(base) local function copy(from_slot, to_slot) base.forms[to_slot] = base.forms[from_slot] end if base.gender ~= "m" then copy("nom_d", "acc_d") end copy("nom_d", "voc_d") copy("dat_d", "loc_d") copy("dat_d", "ins_d") if base.actual_number ~= base.number then local source_num = base.number == "jed" and "_s" or base.number == "dw" and "_d" or "_p" local dest_num = base.number == "jed" and {"_p", "_d"} or base.number == "dw" and {"_s", "_p"} or {"_s", "_d"} for case, _ in pairs(cases) do copy(case .. source_num, case .. dest_num) copy("nom" .. source_num .. "_linked", "nom" .. dest_num .. "_linked") end if base.actual_number ~= "allthree" then local erase_num = base.actual_number == "jed" and {"_d", "_p"} or base.actual_number == "dw" and {"_s", "_p"} or {"_s", "_d"} for case, _ in pairs(cases) do base.forms[case .. erase_num] = nil end base.forms["nom" .. erase_num .. "_linked"] = nil end end end local function get_variants(form) return nil --[=[ FIXME return form:find(com.VAR1) and "var1" or form:find(com.VAR2) and "var2" or form:find(com.VAR3) and "var3" or nil ]=] end -- Compute the categories to add the noun to, as well as the annotation to display in the -- declension title bar. We combine the code to do these functions as both categories and -- title bar contain similar information. local function compute_categories_and_annotation(alternant_multiword_spec) local all_cats = {} local function insert(cattype) m_table.insertIfNot(all_cats, "Upper Sorbian " .. cattype) end if alternant_multiword_spec.pos == "noun" then if alternant_multiword_spec.actual_number == "jed" then insert("uncountable nouns") elseif alternant_multiword_spec.actual_number == "dw" then insert("dualia tantum") elseif alternant_multiword_spec.actual_number == "mn" then insert("pluralia tantum") end end local annotation local annparts = {} local decldescs = {} local vowelalts = {} local foreign = {} local irregs = {} local stemspecs = {} local reducible = nil local function get_genanim(gender, animacy) local gender_code_to_desc = { m = "masculine", f = "feminine", n = "neuter", none = nil, } local animacy_code_to_desc = { pr = "personal", anml = "animal", inan = "inanimate", none = nil, } local descs = {} table.insert(descs, gender_code_to_desc[gender]) if gender ~= "ž" and gender ~= "n" then -- masculine or "none" (e.g. certain pronouns and numerals) table.insert(descs, animacy_code_to_desc[animacy]) end return table.concat(descs, " ") end local function trim(text) text = text:gsub(" +", " ") return mw.text.trim(text) end local function do_word_spec(base) local actual_genanim = get_genanim(base.actual_gender, base.actual_animacy) local declined_genanim = get_genanim(base.gender, base.animacy) local genanim genanim = actual_genanim if base.actual_gender == "m" then insert(actual_genanim .. " " .. alternant_multiword_spec.plpos) end for _, stems in ipairs(base.stem_sets) do local props = declprops[base.decl] local cats = props.cat if type(cats) == "function" then cats = cats(base, stems) end if type(cats) == "string" then cats = {cats} end local default_desc for i, cat in ipairs(cats) do if not cat:find("GENDER") and not cat:find("GENPOS") and not cat:find("POS") then cat = cat end cat = cat:gsub("GENPOS", "GENDER POS") if not cat:find("POS") then cat = cat .. " POS" end if i == #cats then default_desc = cat:gsub(" POS", "") end cat = cat:gsub("GENDER", actual_genanim) cat = cat:gsub("POS", alternant_multiword_spec.plpos) -- Need to trim `cat` because actual_genanim may be an empty string. insert(trim(cat)) end local desc = props.desc if type(desc) == "function" then desc = desc(base, stems) end desc = desc or default_desc desc = desc:gsub("GENDER", genanim) -- Need to trim `desc` because genanim may be an empty string. m_table.insertIfNot(decldescs, trim(desc)) local vowelalt if stems.vowelalt == "quant" then vowelalt = "quant-alt" insert("nouns with quantitative vowel alternation") elseif stems.vowelalt == "quant-ě" then vowelalt = "í-ě-alt" insert("nouns with í-ě alternation") end if vowelalt then m_table.insertIfNot(vowelalts, vowelalt) end if reducible == nil then reducible = stems.reducible end if stems.reducible then insert("nouns with reducible stem") end if base.foreign then m_table.insertIfNot(foreign, "foreign") if not base.decllemma then -- NOTE: there are nouns that use both 'foreign' and 'decllemma', e.g. [[Zeus]]. insert("nouns with regular foreign declension") end end -- User-specified 'decllemma:' indicates irregular stem. Don't consider foreign nouns in -us/-os/-es, -um/-on or -- silent -e (e.g. [[software]]) where this ending is simply dropped in oblique and plural forms as irregular; -- there are too many of these and they are already categorized above as 'nouns with regular foreign declension'. if base.decllemma then m_table.insertIfNot(irregs, "irreg-stem") insert("nouns with irregular stem") end m_table.insertIfNot(stemspecs, stems.vowel_stem) end end local key_entry = alternant_multiword_spec.first_noun or 1 if #alternant_multiword_spec.alternant_or_word_specs >= key_entry then local alternant_or_word_spec = alternant_multiword_spec.alternant_or_word_specs[key_entry] if alternant_or_word_spec.alternants then for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do key_entry = multiword_spec.first_noun or 1 if #multiword_spec.word_specs >= key_entry then do_word_spec(multiword_spec.word_specs[key_entry]) end end else do_word_spec(alternant_or_word_spec) end end if alternant_multiword_spec.actual_number == "jed" or alternant_multiword_spec.actual_number == "mn" or alternant_multiword_spec.actual_number == "dw" then -- not "allthree" or "none" (for [[sebe]]) table.insert(annparts, alternant_multiword_spec.actual_number == "jed" and "sg-only" or alternant_multiword_spec.actual_number == "dw" and "du-only" or "pl-only") end if #decldescs == 0 then table.insert(annparts, "indecl") else table.insert(annparts, table.concat(decldescs, " // ")) end if #vowelalts > 0 then table.insert(annparts, table.concat(vowelalts, "/")) end if reducible == "mixed" then table.insert(annparts, "mixed-reducible") elseif reducible then table.insert(annparts, "reducible") end if #foreign > 0 then table.insert(annparts, table.concat(foreign, " // ")) end if #irregs > 0 then table.insert(annparts, table.concat(irregs, " // ")) end alternant_multiword_spec.annotation = table.concat(annparts, " ") if #stemspecs > 1 then insert("nouns with multiple stems") end if alternant_multiword_spec.actual_number == "allthree" and not m_table.deepEquals(alternant_multiword_spec.sg_genders, alternant_multiword_spec.pl_genders) then insert("nouns that change gender in the plural") end alternant_multiword_spec.categories = all_cats end local function show_forms(alternant_multiword_spec) local lemmas = {} for _, slot in ipairs(potential_lemma_slots) do if alternant_multiword_spec.forms[slot] then for _, formobj in ipairs(alternant_multiword_spec.forms[slot]) do -- FIXME, now can support footnotes as qualifiers in headwords? table.insert(lemmas, formobj.form) end break end end local props = { lemmas = lemmas, slot_table = alternant_multiword_spec.output_noun_slots, lang = lang, canonicalize = function(form) -- return com.remove_variant_codes(form) return form end, } iut.show_forms(alternant_multiword_spec.forms, props) end local function make_table(alternant_multiword_spec) local forms = alternant_multiword_spec.forms local function template_prelude(min_width) return rsub([=[ <div> <div class="NavFrame" style="max-width: MINWIDTHem"> <div class="NavHead" style="background:#eff7ff">{title}{annotation}</div> <div class="NavContent"> {\op}| style="background:#F9F9F9;text-align:center;width:100%;display:table" class="inflection-table" |- ]=], "MINWIDTH", min_width) end local function template_postlude() return [=[ |{\cl}{notes_clause}</div></div></div>]=] end local table_spec_allthree = template_prelude("45") .. [=[ ! style="width:33%;background:#d9ebff" | ! style="background:#d9ebff" | jednota ! style="background:#d9ebff" | dwojota ! style="background:#d9ebff" | mnohota |- !style="background:#eff7ff"|mjenowak | {nom_s} | {nom_d} | {nom_p} |- !style="background:#eff7ff"|rodźak | {gen_s} | {gen_d} | {gen_p} |- !style="background:#eff7ff"|dawak | {dat_s} | {dat_d} | {dat_p} |- !style="background:#eff7ff"|žadak | {acc_s} | {acc_d} | {acc_p} |- !style="background:#eff7ff"|posrědnik | {ins_s} | {ins_d} | {ins_p} |- !style="background:#eff7ff"|měsćak | {loc_s} | {loc_d} | {loc_p} |- !style="background:#eff7ff"|wołak | {voc_s} | {voc_d} | {voc_p} ]=] .. template_postlude() local function get_table_spec_one_number(number, numcode) local table_spec_one_number = [=[ ! style="width:33%;background:#d9ebff" | ! style="background:#d9ebff" | NUMBER |- !style="background:#eff7ff"|mjenowak | {nom_CODE} |- !style="background:#eff7ff"|rodźak | {gen_CODE} |- !style="background:#eff7ff"|dawak | {dat_CODE} |- !style="background:#eff7ff"|žadak | {acc_CODE} |- !style="background:#eff7ff"|posrědnik | {ins_CODE} |- !style="background:#eff7ff"|měsćak | {loc_CODE} |- !style="background:#eff7ff"|wołak | {voc_CODE} ]=] return template_prelude("30") .. table_spec_one_number:gsub("NUMBER", number):gsub("CODE", numcode) .. template_postlude() end local function get_table_spec_one_number_clitic(number, numcode) local table_spec_one_number_clitic = [=[ ! rowspan=2 style="width:33%;background:#d9ebff"| ! colspan=2 style="background:#d9ebff" | NUMBER |- ! style="width:33%;background:#d9ebff" | stressed ! style="background:#d9ebff" | clitic |- !style="background:#eff7ff"|mjenowak | colspan=2 | {nom_CODE} |- !style="background:#eff7ff"|rodźak | {gen_CODE} | {clitic_gen_CODE} |- !style="background:#eff7ff"|dawak | {dat_CODE} | {clitic_dat_CODE} |- !style="background:#eff7ff"|žadak | {acc_CODE} | {clitic_acc_CODE} |- !style="background:#eff7ff"|posrědnik | colspan=2 | {ins_CODE} |- !style="background:#eff7ff"|měsćak | colspan=2 | {loc_CODE} |- !style="background:#eff7ff"|wołak | colspan=2 | {voc_CODE} ]=] return template_prelude("40") .. table_spec_one_number_clitic:gsub("NUMBER", number):gsub("CODE", numcode) .. template_postlude() end local notes_template = [=[ <div style="width:100%;text-align:left;background:#d9ebff"> <div style="display:inline-block;text-align:left;padding-left:1em;padding-right:1em"> {footnote} </div></div> ]=] if alternant_multiword_spec.title then forms.title = alternant_multiword_spec.title else forms.title = 'Skłonjowanje <i lang="hsb">' .. forms.lemma .. '</i>' end local annotation = alternant_multiword_spec.annotation if annotation == "" then forms.annotation = "" else forms.annotation = " (<span style=\"font-size: smaller;\">" .. annotation .. "</span>)" end local number, numcode if alternant_multiword_spec.actual_number == "jed" then number, numcode = "jednota", "jed" elseif alternant_multiword_spec.actual_number == "dw" then number, numcode = "dwojota", "dw" elseif alternant_multiword_spec.actual_number == "mn" then number, numcode = "mnohota", "mn" elseif alternant_multiword_spec.actual_number == "none" then -- used for [[sebe]] number, numcode = "", "s" end local table_spec = alternant_multiword_spec.actual_number == "allthree" and table_spec_allthree or alternant_multiword_spec.has_clitic and get_table_spec_one_number_clitic(number, numcode) or get_table_spec_one_number(number, numcode) forms.notes_clause = forms.footnote ~= "" and m_string_utilities.format(notes_template, forms) or "" return m_string_utilities.format(table_spec, forms) end local function compute_headword_genders(alternant_multiword_spec) local genders = {} local number if alternant_multiword_spec.actual_number == "mn" then number = "-p" elseif alternant_multiword_spec.actual_number == "dw" then number = "-d" else number = "" end iut.map_word_specs(alternant_multiword_spec, function(base) local animacy = base.animacy if animacy == "wěc" then animacy = "in" end m_table.insertIfNot(genders, base.gender .. "-" .. animacy .. number) end) return genders end -- Externally callable function to parse and decline a noun given user-specified arguments. -- Return value is ALTERNANT_MULTIWORD_SPEC, an object where the declined forms are in -- `ALTERNANT_MULTIWORD_SPEC.forms` for each slot. If there are no values for a slot, the -- slot key will be missing. The value for a given slot is a list of objects -- {form=FORM, footnotes=FOOTNOTES}. function export.do_generate_forms(parent_args, from_headword) local params = { [1] = {required = true, default = "žona<f>"}, title = {}, pagename = {}, json = {type = "boolean"}, pos = {}, } if from_headword then params["head"] = {list = true} params["lemma"] = {list = true} params["g"] = {list = true} params["ž"] = {list = true} params["m"] = {list = true} params["adj"] = {list = true} params["dim"] = {list = true} params["id"] = {} end local args = m_para.process(parent_args, params) local parse_props = { parse_indicator_spec = parse_indicator_spec, angle_brackets_omittable = true, allow_blank_lemma = true, } local alternant_multiword_spec = iut.parse_inflected_text(args[1], parse_props) alternant_multiword_spec.title = args.title alternant_multiword_spec.args = args local pagename = args.pagename or from_headword and args.head[1] or mw.title.getCurrentTitle().subpageText normalize_all_lemmas(alternant_multiword_spec, pagename) set_all_defaults_and_check_bad_indicators(alternant_multiword_spec) -- These need to happen before detect_all_indicator_specs() so that adjectives get their genders and numbers set -- appropriately, which are needed to correctly synthesize the adjective lemma. propagate_properties(alternant_multiword_spec, "animacy", "wěc", "mixed") propagate_properties(alternant_multiword_spec, "number", "allthree", "allthree") -- FIXME, the default value (third param) used to be 'm' with a comment indicating that this applied only to -- plural adjectives, where it didn't matter; but here, plural adjectives are distinguished for gender and -- animacy. Make sure 'mixed' works. propagate_properties(alternant_multiword_spec, "gender", "mixed", "mixed") detect_all_indicator_specs(alternant_multiword_spec) -- Propagate 'actual_number' after calling detect_all_indicator_specs(), which sets 'actual_number' for adjectives. propagate_properties(alternant_multiword_spec, "actual_number", "allthree", "allthree") determine_noun_status(alternant_multiword_spec) set_pos(alternant_multiword_spec) alternant_multiword_spec.output_noun_slots = get_output_noun_slots(alternant_multiword_spec) local inflect_props = { skip_slot = function(slot) return skip_slot(alternant_multiword_spec.actual_number, slot) end, slot_table = alternant_multiword_spec.output_noun_slots, get_variants = get_variants, inflect_word_spec = decline_noun, } iut.inflect_multiword_or_alternant_multiword_spec(alternant_multiword_spec, inflect_props) compute_categories_and_annotation(alternant_multiword_spec) alternant_multiword_spec.genders = compute_headword_genders(alternant_multiword_spec) if args.json then alternant_multiword_spec.args = nil return require("Modul:JSON").toJSON(alternant_multiword_spec) end return alternant_multiword_spec end -- Entry point for {{hsb-ndecl}}. Template-callable function to parse and decline a noun given -- user-specified arguments and generate a displayable table of the declined forms. function export.show(frame) local parent_args = frame:getParent().args local alternant_multiword_spec = export.do_generate_forms(parent_args) if type(alternant_multiword_spec) == "string" then -- JSON return value return alternant_multiword_spec end show_forms(alternant_multiword_spec) return make_table(alternant_multiword_spec) .. require("Modul:utilities").format_categories(alternant_multiword_spec.categories, lang, nil, nil, force_cat) end return export djnaaqe9lokxnp3bd1i8hsjgoin455b 17584 17583 2024-11-22T09:03:42Z Sławobóg 2519 17584 Scribunto text/plain local export = {} --[=[ Authorship: Zhnka ]=] --[=[ TERMINOLOGY: -- "slot" = A particular combination of case/number. Example slot names for nouns are "gen_s" (genitive singular) and "voc_p" (vocative plural). Each slot is filled with zero or more forms. -- "form" = The declined form representing the value of a given slot. -- "lemma" = The dictionary form. Generally the nominative masculine singular, but may occasionally be another form if the nominative masculine singular is missing. ]=] local lang = require("Modul:languages").getByCode("hsb") local m_table = require("Modul:table") local m_links = require("Modul:links") local m_string_utilities = require("Modul:string utilities") local iut = require("Modul:inflection utilities") local m_para = require("Modul:parameters") local com = require("Modul:hsb-common") local current_title = mw.title.getCurrentTitle() local NAMESPACE = current_title.nsText local PAGENAME = current_title.text local u = mw.ustring.char local rsplit = mw.text.split local rfind = mw.ustring.find local rmatch = mw.ustring.match local rgmatch = mw.ustring.gmatch local rsubn = mw.ustring.gsub local ulen = mw.ustring.len local usub = mw.ustring.sub local uupper = mw.ustring.upper local ulower = mw.ustring.lower local force_cat = false -- set to true to make categories appear in non-mainspace pages, for testing -- version of rsubn() that discards all but the first return value local function rsub(term, foo, bar) local retval = rsubn(term, foo, bar) return retval end -- version of rsubn() that returns a 2nd argument boolean indicating whether -- a substitution was made. local function rsubb(term, foo, bar) local retval, nsubs = rsubn(term, foo, bar) return retval, nsubs > 0 end local function track(track_id) require("Modul:debug/track")("hsb-noun/" .. track_id) return true end local output_noun_slots = { nom_s = "nom|s", gen_s = "gen|s", dat_s = "dat|s", acc_s = "acc|s", voc_s = "voc|s", loc_s = "loc|s", ins_s = "ins|s", nom_d = "nom|d", gen_d = "gen|d", dat_d = "dat|d", acc_d = "acc|d", voc_d = "voc|d", loc_d = "loc|d", ins_d = "ins|d", nom_p = "nom|p", gen_p = "gen|p", dat_p = "dat|p", acc_p = "acc|p", voc_p = "voc|p", loc_p = "loc|p", ins_p = "ins|p", } local function get_output_noun_slots(alternant_multiword_spec) -- FIXME: To save memory we modify the table in-place. This won't work if we ever end up with multiple calls to -- this module in the same Lua invocation, and we would need to clone the table. if alternant_multiword_spec.actual_number ~= "allthree" then for slot, accel_form in pairs(output_noun_slots) do output_noun_slots[slot] = accel_form:gsub("|[sp]$", "") end end return output_noun_slots end local potential_lemma_slots = {"nom_s", "nom_p", "gen_s"} local cases = { nom = true, gen = true, dat = true, acc = true, voc = true, loc = true, ins = true, } local clitic_cases = { gen = true, dat = true, acc = true, } local function dereduce(base, stem) local dereduced_stem = com.dereduce(base, stem) if not dereduced_stem then error("Unable to dereduce stem '" .. stem .. "'") end return dereduced_stem end local function skip_slot(number, slot) return number == "jed" and rfind(slot, "_p$") or number == "mn" and rfind(slot, "_s$") end -- Basic function to combine stem(s) and ending(s) and insert the result into the appropriate slot. `stems` is either -- the `stems` object passed into the declension functions (containing the various stems; see below) or a string to -- override the stem. (NOTE: If you pass a string in as `stems`, you should pass the value of `stems.footnotes` as the -- value of `footnotes` as it will be lost otherwise. If you need to supply your own footnote in addition, use -- iut.combine_footnotes() to combine any user-specified footnote(s) with your footnote(s).) `endings` is either a -- string specifying a single ending or a list of endings. If `endings` is nil, no forms are inserted. If an ending is -- "-", the value of `stems` is ignored and the lemma is used instead as the stem; this is important in case the user -- used `decllemma:` to specify a declension lemma different from the actual lemma, or specified '.foreign' (which has -- a similar effect). local function add(base, slot, stems, endings, footnotes) if not endings then return end -- Call skip_slot() based on the declined number; if the actual number is different, we correct this in -- decline_noun() at the end. if skip_slot(base.number, slot) then return end local stems_footnotes = type(stems) == "table" and stems.footnotes or nil footnotes = iut.combine_footnotes(iut.combine_footnotes(base.footnotes, stems_footnotes), footnotes) if type(endings) == "string" then endings = {endings} end for _, ending in ipairs(endings) do -- Compute the stem. If ending is "-", use the lemma regardless. Otherwise if `stems` is a string, use it. -- Otherwise `stems` is an object containing four stems (vowel-vs-non-vowel cross regular-vs-oblique); -- compute the appropriate stem based on the slot and whether the ending begins with a vowel. local stem if ending == "-" then stem = base.actual_lemma ending = "" elseif type(stems) == "string" then stem = stems else local is_vowel_ending = rfind(ending, "^" .. com.vowel_c) if stems.oblique_slots == "all" then if is_vowel_ending then stem = stems.oblique_vowel_stem else stem = stems.oblique_nonvowel_stem end elseif is_vowel_ending then stem = stems.vowel_stem else stem = stems.nonvowel_stem end end ending = iut.combine_form_and_footnotes(ending, footnotes) local function combine_stem_ending(stem, ending) return com.combine_stem_ending(base, slot, stem, ending) end iut.add_forms(base.forms, slot, stem, ending, combine_stem_ending) end end local function process_slot_overrides(base, do_slot) for slot, overrides in pairs(base.overrides) do -- Call skip_slot() based on the declined number; if the actual number is different, we correct this in -- decline_noun() at the end. if skip_slot(base.number, slot) then error("Override specified for invalid slot '" .. slot .. "' due to '" .. base.number .. "' number restriction") end if do_slot(slot) then base.slot_overridden[slot] = true base.forms[slot] = nil for _, override in ipairs(overrides) do for _, value in ipairs(override.values) do local form = value.form local combined_notes = iut.combine_footnotes(base.footnotes, value.footnotes) if override.full then if form ~= "" then iut.insert_form(base.forms, slot, {form = form, footnotes = combined_notes}) end else -- Convert a null ending to "-" in the acc/voc sg slots so that e.g. [[Kerberos]] declared as -- <m.sg.foreign.gena:u.acc-:a> works correctly and generates accusative 'Kerberos/Kerbera' not -- #'Kerber/Kerbera'. if (slot == "acc_s" or slot == "voc_s") and form == "" then form = "-" end for _, stems in ipairs(base.stem_sets) do add(base, slot, stems, form, combined_notes) end end end end end end end local function add_decl(base, stems, gen_s, dat_s, acc_s, voc_s, loc_s, ins_s, nom_d, gen_d, dat_d, nom_p, gen_p, dat_p, acc_p, loc_p, ins_p, nom_s, footnotes ) add(base, "nom_s", stems, "-", footnotes) add(base, "gen_s", stems, gen_s, footnotes) add(base, "dat_s", stems, dat_s, footnotes) add(base, "acc_s", stems, acc_s, footnotes) add(base, "voc_s", stems, voc_s, footnotes) add(base, "loc_s", stems, loc_s, footnotes) add(base, "ins_s", stems, ins_s, footnotes) add(base, "nom_d", stems, nom_d, footnotes) add(base, "gen_d", stems, gen_d, footnotes) add(base, "dat_d", stems, dat_d, footnotes) if base.number == "mn" then -- If this is a plurale tantum noun and we're processing the nominative plural, use the user-specified lemma -- rather than generating the plural from the synthesized singular, which may not match the specified lemma -- (e.g. [[tvargle]] "Olomouc cheese" using <m.pl.mixed> would try to generate 'tvargle/tvargly', and [[peníze]] -- "money" using <m.pl.#ě.genpl-> would try to generate 'peněze'). local acc_p_like_nom = m_table.deepEquals(nom_p, acc_p) nom_p = "-" if acc_p_like_nom then acc_p = "-" end end add(base, "nom_p", stems, nom_p, footnotes) add(base, "gen_p", stems, gen_p, footnotes) add(base, "dat_p", stems, dat_p, footnotes) add(base, "acc_p", stems, acc_p, footnotes) add(base, "loc_p", stems, loc_p, footnotes) add(base, "ins_p", stems, ins_p, footnotes) add(base, "nom_s", stems, nom_s, footnotes) end local function add_sg_decl(base, stems, gen_s, dat_s, acc_s, voc_s, loc_s, ins_s, footnotes ) add_decl(base, stems, gen_s, dat_s, acc_s, voc_s, loc_s, ins_s, nil, nil, nil, nil, nil, nil, nil, nil, nil, footnotes) end local function add_du_only_decl(base, stems, gen_d, dat_d, footnotes ) add_decl(base, stems, nil, nil, nil, nil, nil, nil, "-", gen_d, dat_d, nil, nil, nil, nil, nil, nil, footnotes) end local function add_pl_only_decl(base, stems, gen_p, dat_p, acc_p, loc_p, ins_p, footnotes ) add_decl(base, stems, nil, nil, nil, nil, nil, nil, nil, nil, nil, "-", gen_p, dat_p, acc_p, loc_p, ins_p, footnotes) end local function handle_derived_slots_and_overrides(base) local function is_non_derived_slot(slot) return slot ~= "voc_p" and slot ~= "acc_s" and slot ~= "clitic_acc_s" end local function is_derived_slot(slot) return not is_non_derived_slot(slot) end base.slot_overridden = {} -- Handle overrides for the non-derived slots. Do this before generating the derived -- slots so overrides of the source slots (e.g. nom_p) propagate to the derived slots. process_slot_overrides(base, is_non_derived_slot) -- Generate the remaining slots that are derived from other slots. if not base.pron and not base.det then -- Pronouns don't have a vocative (singular or plural). iut.insert_forms(base.forms, "voc_p", base.forms.nom_p) end if not base.forms.acc_s and not base.slot_overridden.acc_s then iut.insert_forms(base.forms, "acc_s", base.forms[base.animacy == "wěc" and "nom_s" or base.animacy == "wos" and "gen_s" or base.animacy == "zwj" and "gen_s"]) end if not base.forms.acc_d and not base.slot_overridden.acc_d then iut.insert_forms(base.forms, "acc_d", base.forms[base.animacy == "wěc" and "nom_d" or base.animacy == "wos" and "gen_d" or base.animacy == "zwj" and "nom_d"]) end if not base.forms.acc_p and not base.slot_overridden.acc_p then iut.insert_forms(base.forms, "acc_p", base.forms[base.animacy == "wěc" and "nom_p" or base.animacy == "wos" and "gen_p" or base.animacy == "zwj" and "nom_p"]) end if not base.forms.clitic_acc_s and not base.slot_overridden.clitic_acc_s then iut.insert_forms(base.forms, "clitic_acc_s", base.forms[base.animacy == "wěc" and "nom_s" or "clitic_gen_s"]) end -- Handle overrides for derived slots, to allow them to be overridden. process_slot_overrides(base, is_derived_slot) -- Compute linked versions of potential lemma slots, for use in {{hsb-noun}}. -- We substitute the original lemma (before removing links) for forms that -- are the same as the lemma, if the original lemma has links. for _, slot in ipairs(potential_lemma_slots) do iut.insert_forms(base.forms, slot .. "_linked", iut.map_forms(base.forms[slot], function(form) if form == base.orig_lemma_no_links and rfind(base.orig_lemma, "%[%[") then return base.orig_lemma else return form end end)) end end -- Table mapping declension types to functions to decline the noun. The function takes two arguments, `base` and -- `stems`; the latter specifies the computed stems (vowel vs. non-vowel, singular vs. plural) and whether the noun -- is reducible and/or has vowel alternations in the stem. Most of the specifics of determining which stem to use -- and how to modify it for the given ending are handled in add_decl(); the declension functions just need to generate -- the appropriate endings. local decls = {} -- Table specifying additional properties for declension types. Every declension type must have such a table, which -- specifies which category or categories to add and what annotation to show in the title bar of the declension table. -- -- * Only the `cat` property of this table is mandatory; there is also a `desc` property to specify the annotation, but -- this can be omitted and the annotation will then be computed from the `cat` property. The `cat` property is either -- a string, a list of strings or a function (of two arguments, `base` and `stems` as above) returning a string or -- list of strings. The string can contain the keywords GENDER to substitute the gender (and animacy for masculine -- nouns) and POS (to substitute the pluralized part of speech). The keyword GENPOS is equivalent to 'GENDER POS'. If -- no keyword is present, ' GENPOS' is added onto the end. If only GENDER is present, ' POS' is added onto the end. -- In all cases, the language name is added onto the beginning to form the full category name. -- * The `desc` property is of the same form as the `cat` property and specifies the annotation to display in the title -- bar (which may have the same format as the category minus the part of speech, or may be abbreviated). The value -- may not be a list of strings, as only one annotation is displayed. If omitted, it is derived from the category -- spec(s) by taking the last category (if more than one is given) and removing ' POS' before keyword substitution. local declprops = {} decls["hard-m"] = function(base, stems) local gen_s = base.animacy == "in" and {"a", "u"} or "a" local nom_p = base.animacy == "wos" and "ojo" or "y" local voc_s = not rmatch(base.lemma, ".*tr$") and "o" add_decl(base, stems, gen_s, "ej", acc_s, voc_s, "u", "om", "aj", "ow", "omaj", nom_p, "ow", "am", nil, "ach", "ami") add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, nil, nil, "e", "e") end declprops["hard-m"] = { desc = function(base, stems) return "muski twjerdy zdónk" end, cat = function(base, stems) return "muski twjerdy zdónk" end } decls["soft-m"] = function(base, stems) local gen_s = base.animacy == "in" and {"a", "u"} or "a" local nom_p = base.animacy == "wos" and "ojo" or "e" add_decl(base, com.addj(stems.oblique_vowel_stem), gen_s, "ej", acc_s, "o", "u", "om", "ej", "ow", "omaj", nom_p, "ow", "am", nil, "ach", "emi") end declprops["soft-m"] = { desc = function(base, stems) return "muski mjechki zdónk" end, cat = function(base, stems) return "muski mjechki zdónk" end } decls["czs-m"] = function(base, stems) local gen_s = base.animacy == "in" and {"a", "u"} or "a" local nom_p = base.animacy == "wos" and "ojo" or "y" add_decl(base, stems, gen_s, "ej", acc_s, "o", "u", "om", "aj", "ow", "omaj", nom_p, "ow", "am", nil, "ach", "ami") end declprops["czs-m"] = { desc = function(base, stems) return "muski twjerdy syčacy zdónk" end, cat = function(base, stems) return "muski twjerdy zdónk" end } decls["velar-m"] = function(base, stems) local gen_s = base.animacy == "in" and {"a", "u"} or "a" local nom_p = base.animacy == "wos" and "ojo" or "i" add_decl(base, stems, gen_s, "ej", acc_s, "o", "u", "om", "aj", "ow", "omaj", nom_p, "ow", "am", nil, "ach", "ami") add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, nil, nil, nil, "e") end declprops["velar-m"] = { desc = function(base, stems) return "masculine velar stem" end, cat = function(base, stems) return "masculine velar stem" end } decls["adje-m"] = function(base, stems) if rmatch(base.lemma, "^.*" .. com.velar_c .. "i$") then add_decl(base, stems, "eho", "emu", nil, "-", "im", "im", "aj", "eju", "imaj", nom_p, "ich", "im", nil, "ich", "imi") if base.animacy == "wos" then add_decl(base, com.apply_palatalization(stems.oblique_vowel_stem), nil, nil, nil, nil, nil, nil, nil, nil, nil, "y") else add_decl(base, stems, nil, nil, nil, nil, nil, nil, nil, nil, nil, "e") end elseif rmatch(base.lemma, "^.*" .. com.inherently_soft_c .. "i$") then local nom_p = base.animacy == "wos" and "i" or "e" add_decl(base, stems, "eho", "emu", nil, "-", "im", "im", "ej", "eju", "imaj", nom_p, "ich", "im", nil, "ich", "imi") elseif rmatch(base.lemma, "^.*[czs]e$") then local nom_p = base.animacy == "wos" and "y" or "e" add_decl(base, stems, "eho", "emu", nil, "-", "ym", "ym", "aj", "eju", "ymaj", nom_p, "ych", "ym", nil, "ych", "ymi") else add_decl(base, stems, "eho", "emu", nil, "-", "ym", "ym", "aj", "eju", "ymaj", nom_p, "ych", "ym", nil, "ych", "ymi") if base.animacy == "wos" then add_decl(base, com.apply_palatalization(stems.oblique_vowel_stem), nil, nil, nil, nil, nil, nil, nil, nil, nil, "i") else add_decl(base, stems, nil, nil, nil, nil, nil, nil, nil, nil, nil, "e") end end end declprops["adje-m"] = { desc = function(base, stems) return "" end, cat = function(base, stems) return "masculine adjectival" end } decls["hard-f"] = function(base, stems) add_decl(base, stems, "y", nil, "u", "-", nil, "u", nil, "ow", "omaj", "y", "ow", "am", "y", "ach", "ami") add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, "e", nil, nil, "e", nil, "e") end declprops["hard-f"] = { desc = function(base, stems) return "žónski twjerdy zdónk" end, cat = function(base, stems) return "žónski twjerdy zdónk" end } decls["soft-f"] = function(base, stems) if rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$") then add_decl(base, com.addj(stems.oblique_vowel_stem), "e", nil, "-", "-", nil, "u", nil, "ow", "omaj", "e", "ow", "am", "e", "ach", "emi") else add_decl(base, stems, "e", nil, "u", "-", nil, "u", nil, "ow", "omaj", "e", "ow", "am", "e", "ach", "emi") end add_decl(base, com.removej(com.addj(stems.oblique_vowel_stem)), nil, "i", nil, nil, "i", nil, "i", nil, nil, nil, "i") end declprops["soft-f"] = { desc = function(base, stems) return "žónski mjechki zdónk" end, cat = function(base, stems) return "žónski mjechki zdónk" end } decls["czs-f"] = function(base, stems) if rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$") then add_decl(base, stems, "y", "y", "-", "-", "y", "u", "y", "ow", "omaj", "y", "ow", "am", "y", "ach", "ami") else add_decl(base, stems, "y", "y", "u", "-", "y", "u", "y", "ow", "omaj", "y", "ow", "am", "y", "ach", "ami") end end declprops["czs-f"] = { desc = function(base, stems) return "žónski twjerdy syčacy zdónk" end, cat = function(base, stems) return "žónski twjerdy zdónk" end } decls["velar-f"] = function(base, stems) add_decl(base, stems, "i", nil, "u", "-", nil, "u", nil, "ow", "omaj", "i", "ow", "am", "i", "ach", "ami") add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, "e", nil, nil, "e", nil, "e") end declprops["velar-f"] = { desc = function(base, stems) return "feminine velar stem" end, cat = function(base, stems) return "feminine velar stem" end } decls["v-f"] = function(base, stems) add_decl(base, stems, "wje", "wi", "-", "-", "wi", "wju", "wi", "wjow", "wjomaj", "wje", "wjow", "wjam", "wje", "wjach", "wjemi") end declprops["v-f"] = { desc = function(base, stems) return "feminine v-stem" end, cat = function(base, stems) return "feminine v-stem" end } decls["adje-f"] = function(base, stems) if rmatch(base.lemma, "^.*" .. com.velar_c .. "a$") or rmatch(base.lemma, "^.*" .. com.inherently_soft_c .. "a$") then add_decl(base, stems, "eje", "ej", "u", "-", "ej", "ej", "ej", "eju", "imaj", "e", "ich", "im", "e", "ich", "imi") else add_decl(base, stems, "eje", "ej", "u", "-", "ej", "ej", "ej", "eju", "ymaj", "e", "ych", "ym", "e", "ych", "ymi") end end declprops["adje-f"] = { desc = function(base, stems) return "" end, cat = function(base, stems) return "feminine adjectival" end } decls["hard-n"] = function(base, stems) add_decl(base, stems, "a", "u", "-", "-", nil, "om", nil, "ow", "omaj", "a", "ow", "am", "a", "ach", "ami") add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, nil, nil, nil, "e", nil, "e") end declprops["hard-n"] = { desc = function(base, stems) return "ničeji twjerdy zdónk" end, cat = function(base, stems) return "ničeji twjerdy zdónk" end } decls["soft-n"] = function(base, stems) add_decl(base, stems, "a", "u", "-", "-", "u", "om", nil, "ow", "omaj", "a", "ow", "am", "a", "ach", "emi") add_decl(base, com.removej(stems.oblique_vowel_stem), nil, nil, nil, nil, nil, nil, "i") end declprops["soft-n"] = { desc = function(base, stems) return "ničeji mjechki zdónk" end, cat = function(base, stems) return "ničeji mjechki zdónk" end } decls["czs-n"] = function(base, stems) add_decl(base, stems, "a", "u", "-", "-", "u", "om", "y", "ow", "omaj", "a", "ow", "am", "a", "ach", "ami") end declprops["czs-n"] = { desc = function(base, stems) return "ničeji twjerdy syčacy zdónk" end, cat = function(base, stems) return "ničeji twjerdy zdónk" end } decls["velar-n"] = function(base, stems) add_decl(base, stems, "a", "u", "-", "-", "u", "om", nil, "ow", "omaj", "a", "ow", "am", "a", "ach", "ami") add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, nil, nil, nil, "e", nil, "e") end declprops["velar-n"] = { desc = function(base, stems) return "neuter velar stem" end, cat = function(base, stems) return "neuter velar stem" end } decls["adje-n"] = function(base, stems) if rmatch(base.lemma, "^.*" .. com.velar_c .. "e$") or rmatch(base.lemma, "^.*" .. com.inherently_soft_c .. "e$") then add_decl(base, stems, "eho", "emu", "-", "-", "im", "im", "ej", "eju", "imaj", "e", "ich", "im", "e", "ich", "imi") else add_decl(base, stems, "eje", "ej", "u", "-", "ym", "ym", "ej", "eju", "ymaj", "e", "ych", "ym", "e", "ych", "ymi") end end declprops["adje-n"] = { desc = function(base, stems) return "" end, cat = function(base, stems) return "neuter adjectival" end } decls["tstem-n"] = function(base, stems) add_decl(base, stems, "eća", "eću", "-", "-", "eću", "ećom", "eći", "ećow", "ećomaj", "ata", "atow", "atam", "ata", "atach", "atami") end declprops["tstem-n"] = { desc = function(base, stems) return "neuter t-stem" end, cat = function(base, stems) return "neuter t-stem" end } decls["nstem-n"] = function(base, stems) add_decl(base, stems, "enja", "enju", "-", "-", "enju", "enjom", "eni", "enjow", "enjomaj", "enja", "enjow", "enjam", "enja", "enjach", "enjami") end declprops["nstem-n"] = { desc = function(base, stems) return "neuter n-stem" end, cat = function(base, stems) return "neuter n-stem" end } decls["adj"] = function(base, stems) local props = {} local propspec = table.concat(props, ".") if propspec ~= "" then propspec = "<" .. propspec .. ">" end local adj_alternant_multiword_spec = require("Modul:zlw-ocs-adjective").do_generate_forms({base.lemma .. propspec}) local function copy(from_slot, to_slot) base.forms[to_slot] = adj_alternant_multiword_spec.forms[from_slot] end if base.number ~= "mn" then if base.gender == "m" then copy("nom_m", "nom_s") copy("gen_mn", "gen_s") copy("dat_mn", "dat_s") copy("loc_mn", "loc_s") copy("ins_mn", "ins_s") elseif base.gender == "ž" then copy("nom_f", "nom_s") copy("gen_f", "gen_s") copy("dat_f", "dat_s") copy("acc_f", "acc_s") copy("loc_f", "loc_s") copy("ins_f", "ins_s") else copy("nom_n", "nom_s") copy("gen_mn", "gen_s") copy("dat_mn", "dat_s") copy("acc_n", "acc_s") copy("loc_mn", "loc_s") copy("ins_mn", "ins_s") end if not base.forms.voc_s then iut.insert_forms(base.forms, "voc_s", base.forms.nom_s) end end if base.number ~= "jed" then if base.gender == "m" then copy("nom_mp", "nom_p") copy("acc_mfp", "acc_p") copy("nom_md", "nom_d") elseif base.gender == "ž" then copy("nom_fp", "nom_p") copy("acc_mfp", "acc_p") copy("nom_fnd", "nom_d") else copy("nom_np", "nom_p") copy("acc_np", "acc_p") copy("nom_fnd", "nom_d") end copy("gen_p", "gen_p") copy("dat_p", "dat_p") copy("ins_p", "ins_p") copy("loc_p", "loc_p") copy("gen_d", "gen_d") copy("dat_d", "dat_d") end end local function get_stemtype(base) if rfind(base.lemma, "ý$") then return "hard" elseif rfind(base.lemma, "í$") then return "soft" else return "possessive" end end declprops["adj"] = { cat = function(base, stems) return {"adjectival POS", get_stemtype(base) .. " GENDER adjectival POS"} end, } decls["indecl"] = function(base, stems) -- Indeclinable. Note that fully indeclinable nouns should not have a table at all rather than one all of whose forms -- are the same; but having an indeclinable declension is useful for nouns that may or may not be indeclinable, e.g. -- [[desatero]] "group of ten" or the plural of [[peso]], which may be indeclinable 'pesos'. add_decl(base, stems, "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-") end declprops["indecl"] = { cat = function(base, stems) if base.adj then return {"adjectival POS", "indeclinable adjectival POS", "indeclinable GENDER adjectival POS"} else return {"indeclinable POS", "indeclinable GENPOS"} end end } decls["manual"] = function(base, stems) -- Anything declined manually using overrides. We don't set any declensions except the nom_s (or nom_p if plurale -- tantum). add(base, base.number == "mn" and "nom_p" or "nom_s", stems, "-") end declprops["manual"] = { desc = "GENDER", cat = {}, } local function set_pron_defaults(base) if base.gender or base.lemma ~= "ona" and base.number or base.animacy then error("Can't specify gender, number or animacy for pronouns") end local function pron_props() -- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC. if base.lemma == "štó" then return "none", "jed", "wos", false elseif base.lemma == "što" then return "none", "jed", "wěc", false else error(("Unrecognized pronoun '%s'"):format(base.lemma)) end end local gender, number, animacy, has_clitic = pron_props() base.gender = gender base.actual_gender = gender base.number = number base.actual_number = number base.animacy = animacy base.actual_animacy = animacy base.has_clitic = has_clitic end local function determine_pronoun_stems(base) if base.stem_sets then error("Reducible and vowel alternation specs cannot be given with pronouns") end base.stem_sets = {{reducible = false, vowel_stem = "", nonvowel_stem = ""}} base.decl = "pron" end decls["pron"] = function(base, stems) if base.lemma == "štó" then add_decl(base, stems, "koho", "komu", nil, nil, "kim", "kim") elseif base.lemma == "što" then add_decl(base, stems, "čeho", "čemu", nil, nil, "čim", "čim") else error(("Internal error: Unrecognized pronoun lemma '%s'"):format(base.lemma)) end end declprops["pron"] = { desc = "GENDER pronoun", cat = {}, } local function set_num_defaults(base) if base.gender or base.animacy then error("Can't specify gender, number or animacy for numeral") end local function num_props() -- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC. return "none", "mn", "none", false end local gender, number, animacy, has_clitic = num_props() base.gender = gender base.actual_gender = gender base.number = number base.actual_number = number base.animacy = animacy base.actual_animacy = animacy base.has_clitic = has_clitic end local function set_det_defaults(base) if base.gender or base.number or base.animacy then error("Can't specify gender, number or animacy for determiner") end local function det_props() -- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC. return "none", "none", "none", false end local gender, number, animacy, has_clitic = det_props() base.gender = gender base.actual_gender = gender base.number = number base.actual_number = number base.animacy = animacy base.actual_animacy = animacy base.has_clitic = has_clitic end local function determine_determiner_stems(base) if base.stem_sets then error("Reducible and vowel alternation specs cannot be given with determiners") end local stem = rmatch(base.lemma, "^(.*)" .. com.vowel_c .. "$") or base.lemma base.stem_sets = {{reducible = false, vowel_stem = stem, nonvowel_stem = stem}} base.decl = "det" end decls["det"] = function(base, stems) add_sg_decl(base, stems, "a", "a", "-", nil, "a", "a") end declprops["det"] = { desc = "GENDER determiner", cat = {}, } local function fetch_footnotes(separated_group) local footnotes for j = 2, #separated_group - 1, 2 do if separated_group[j + 1] ~= "" then error("Extraneous text after bracketed footnotes: '" .. table.concat(separated_group) .. "'") end if not footnotes then footnotes = {} end table.insert(footnotes, separated_group[j]) end return footnotes end local function parse_override(segments) local retval = {values = {}} local part = segments[1] local slots = {} while true do local case = usub(part, 1, 3) if cases[case] then -- ok else error(("Unrecognized case '%s' in override: '%s'"):format(case, table.concat(segments))) end part = usub(part, 4) local slot if rfind(part, "^pl") then part = usub(part, 3) slot = case .. "_p" elseif rfind(part, "^du") then part = usub(part, 3) slot = case .. "_d" else slot = case .. "_s" end table.insert(slots, slot) if rfind(part, "^%+") then part = usub(part, 2) else break end end if rfind(part, "^:") then retval.full = true part = usub(part, 2) end segments[1] = part local colon_separated_groups = iut.split_alternating_runs_and_strip_spaces(segments, ":") for i, colon_separated_group in ipairs(colon_separated_groups) do local value = {} local form = colon_separated_group[1] if form == "" then error(("Use - to indicate an empty ending for slot%s '%s': '%s'"):format(#slots > 1 and "s" or "", table.concat(slots), table.concat(segments))) elseif form == "-" then value.form = "" else value.form = form end value.footnotes = fetch_footnotes(colon_separated_group) table.insert(retval.values, value) end return slots, retval end --[=[ Parse an indicator spec (text consisting of angle brackets and zero or more dot-separated indicators within them). Return value is an object of the form { overrides = { SLOT = {OVERRIDE, OVERRIDE, ...}, -- as returned by parse_override() ... }, forms = {}, -- forms for a single spec alternant; see `forms` below footnotes = {"FOOTNOTE", "FOOTNOTE", ...}, -- may be missing stems = { -- may be missing { reducible = TRUE_OR_FALSE, footnotes = {"FOOTNOTE", "FOOTNOTE", ...}, -- may be missing -- The following fields are filled in by determine_stems() vowel_stem = "STEM", nonvowel_stem = "STEM", oblique_slots = "all", oblique_vowel_stem = "STEM" or nil (only needs to be set if oblique_slots is non-nil), oblique_nonvowel_stem = "STEM" or nil (only needs to be set if oblique_slots is non-nil), }, ... }, gender = "GENDER", -- "m", "ž", "n" number = "NUMBER", -- "jed", "mn"; may be missing animacy = "ANIMACY", -- "wěc", "an"; may be missing hard = true, -- may be missing soft = true, -- may be missing mixed = true, -- may be missing surname = true, -- may be missing istem = true, -- may be missing ["-istem"] = true, -- may be missing tstem = true, -- may be missing nstem = true, -- may be missing tech = true, -- may be missing foreign = true, -- may be missing mostlyindecl = true, -- may be missing indecl = true, -- may be missing manual = true, -- may be missing adj = true, -- may be missing decllemma = "DECLENSION-LEMMA", -- may be missing declgender = "DECLENSION-GENDER", -- may be missing declnumber = "DECLENSION-NUMBER", -- may be missing -- The following additional fields are added by other functions: orig_lemma = "ORIGINAL-LEMMA", -- as given by the user orig_lemma_no_links = "ORIGINAL-LEMMA-NO-LINKS", -- links removed lemma = "LEMMA", -- `orig_lemma_no_links`, converted to singular form if plural and lowercase if all-uppercase forms = { SLOT = { { form = "FORM", footnotes = {"FOOTNOTE", "FOOTNOTE", ...} -- may be missing }, ... }, ... }, decl = "DECL", -- declension, e.g. "hard-m" vowel_stem = "VOWEL-STEM", -- derived from vowel-ending lemmas nonvowel_stem = "NONVOWEL-STEM", -- derived from non-vowel-ending lemmas } ]=] local function parse_indicator_spec(angle_bracket_spec) local inside = rmatch(angle_bracket_spec, "^<(.*)>$") assert(inside) local base = {overrides = {}, forms = {}} if inside ~= "" then local segments = iut.parse_balanced_segment_run(inside, "[", "]") local dot_separated_groups = iut.split_alternating_runs_and_strip_spaces(segments, "%.") for i, dot_separated_group in ipairs(dot_separated_groups) do local part = dot_separated_group[1] local case_prefix = usub(part, 1, 3) if cases[case_prefix] then local slots, override = parse_override(dot_separated_group) for _, slot in ipairs(slots) do if base.overrides[slot] then error(("Two overrides specified for slot '%s'"):format(slot)) else base.overrides[slot] = {override} end end elseif part == "" then if #dot_separated_group == 1 then error("Blank indicator: '" .. inside .. "'") end base.footnotes = fetch_footnotes(dot_separated_group) elseif rfind(part, "^[-*#ě]*$") or rfind(part, "^[-*#ě]*,") then if base.stem_sets then error("Can't specify reducible/vowel-alternant indicator twice: '" .. inside .. "'") end local comma_separated_groups = iut.split_alternating_runs_and_strip_spaces(dot_separated_group, ",") local stem_sets = {} for i, comma_separated_group in ipairs(comma_separated_groups) do local pattern = comma_separated_group[1] local orig_pattern = pattern local reducible, vowelalt, oblique_slots if pattern == "-" then -- default reducible, no vowel alt else local before, after before, reducible, after = rmatch(pattern, "^(.-)(%-?%*)(.-)$") if before then pattern = before .. after reducible = reducible == "*" end if pattern ~= "" then if not rfind(pattern, "^##?ě?$") then error("Unrecognized vowel-alternation pattern '" .. pattern .. "', should be one of #, ##, #ě or ##ě: '" .. inside .. "'") end if pattern == "#ě" or pattern == "##ě" then vowelalt = "quant-ě" else vowelalt = "quant" end -- `oblique_slots` will be later changed to "all" if the lemma ends in a consonant. oblique_slots = "all" end end table.insert(stem_sets, { reducible = reducible, vowelalt = vowelalt, oblique_slots = oblique_slots, footnotes = fetch_footnotes(comma_separated_group) }) end base.stem_sets = stem_sets elseif #dot_separated_group > 1 then error("Footnotes only allowed with slot overrides, reducible or vowel alternation specs or by themselves: '" .. table.concat(dot_separated_group) .. "'") elseif part == "m" or part == "ž" or part == "n" then if base.gender then error("Can't specify gender twice: '" .. inside .. "'") end base.gender = part elseif part == "jed" or part == "dw" or part == "mn" then if base.number then error("Can't specify number twice: '" .. inside .. "'") end base.number = part elseif part == "wos" or part == "zwj" or part == "wěc" then if base.animacy then error("Can't specify animacy twice: '" .. inside .. "'") end base.animacy = part elseif part == "hard" or part == "soft" or part == "istem" or part == "tstem" or part == "nstem" or part == "indecl" or part == "pron" or part == "det" or part == "velar" or part == "vstem" or part == "adje" then if base[part] then error("Can't specify '" .. part .. "' twice: '" .. inside .. "'") end base[part] = true elseif part == "+" then if base.adj then error("Can't specify '+' twice: '" .. inside .. "'") end base.adj = true elseif part == "!" then if base.manual then error("Can't specify '!' twice: '" .. inside .. "'") end base.manual = true elseif rfind(part, "^mixedistem:") then if base.mixedistem then error("Can't specify 'mixedistem:' twice: '" .. inside .. "'") end base.mixedistem = rsub(part, "^mixedistem:", "") elseif rfind(part, "^decllemma:") then if base.decllemma then error("Can't specify 'decllemma:' twice: '" .. inside .. "'") end base.decllemma = rsub(part, "^decllemma:", "") elseif rfind(part, "^declgender:") then if base.declgender then error("Can't specify 'declgender:' twice: '" .. inside .. "'") end base.declgender = rsub(part, "^declgender:", "") elseif rfind(part, "^declnumber:") then if base.declnumber then error("Can't specify 'declnumber:' twice: '" .. inside .. "'") end base.declnumber = rsub(part, "^declnumber:", "") else error("Unrecognized indicator '" .. part .. "': '" .. inside .. "'") end end end return base end local function is_regular_noun(base) return not base.adj and not base.pron and not base.det and not base.num end local function process_declnumber(base) base.actual_number = base.number if base.declnumber then if base.declnumber == "jed" or base.declnumber == "dw" or base.declnumber == "mn" then base.number = base.declnumber else error(("Unrecognized value '%s' for 'declnumber', should be 'sg' or 'pl'"):format(base.declnumber)) end end end local function set_defaults_and_check_bad_indicators(base) -- Set default values. local regular_noun = is_regular_noun(base) if base.pron then set_pron_defaults(base) elseif base.det then set_det_defaults(base) elseif base.num then set_num_defaults(base) elseif not base.adj then if not base.gender then if base.manual then base.gender = "none" else error("For nouns, gender must be specified") end end base.number = base.number or "allthree" process_declnumber(base) base.animacy = base.animacy or "wěc" base.actual_gender = base.gender base.actual_animacy = base.animacy if base.declgender then if base.declgender == "m-an" then base.gender = "m" base.animacy = "wos" elseif base.declgender == "m-in" then base.gender = "m" base.animacy = "wěc" elseif base.declgender == "ž" or base.declgender == "n" then base.gender = base.declgender else error(("Unrecognized value '%s' for 'declgender', should be 'm-an', 'm-in', 'f' or 'n'"):format(base.declgender)) end end end -- Check for bad indicator combinations. if (base.hard and 1 or 0) + (base.soft and 1 or 0) > 1 then error("At most one of 'hard' or 'soft' can be specified") end if base.istem and base["-istem"] then error("'istem' and '-istem' cannot be specified together") end if (base.istem or base["-istem"]) then if not regular_noun then error("'istem' and '-istem' can only be specified with regular nouns") end end if base.declgender and not regular_noun then error("'declgender' can only be specified with regular nouns") end end local function set_all_defaults_and_check_bad_indicators(alternant_multiword_spec) local is_multiword = #alternant_multiword_spec.alternant_or_word_specs > 1 iut.map_word_specs(alternant_multiword_spec, function(base) set_defaults_and_check_bad_indicators(base) base.multiword = is_multiword -- FIXME: not currently used; consider deleting alternant_multiword_spec.has_clitic = alternant_multiword_spec.has_clitic or base.has_clitic if base.pron then alternant_multiword_spec.saw_pron = true else alternant_multiword_spec.saw_non_pron = true end if base.det then alternant_multiword_spec.saw_det = true else alternant_multiword_spec.saw_non_det = true end if base.num then alternant_multiword_spec.saw_num = true else alternant_multiword_spec.saw_non_num = true end end) end local function undo_second_palatalization(base, word, is_adjective) local function try(from, to) local stem = rmatch(word, "^(.*)" .. from .. "$") if stem then return stem .. to end return nil end return is_adjective and try("št", "sk") or is_adjective and try("čt", "ck") or try("c", "k") or -- FIXME, this could be wrong and c correct try("ř", "r") or try("z", "h") or -- FIXME, this could be wrong and z or g correct try("š", "ch") or word end -- For a plural-only lemma, synthesize a likely singular lemma. It doesn't have to be -- theoretically correct as long as it generates all the correct plural forms. local function synthesize_singular_lemma(base) if not base.stem_sets then base.stem_sets = {{}} end local lemma_determined -- Loop over all stem sets in case the user specified multiple ones (e.g. '*,-*'). If we try to reconstruct -- different lemmas for different stem sets, we'll throw an error below. for _, stems in ipairs(base.stem_sets) do local stem, lemma while true do if base.indecl then -- If specified as indeclinable, leave it alone; e.g. 'pesos' indeclinable plural of [[peso]]. lemma = base.lemma break elseif base.gender == "m" then stem = rmatch(base.lemma, "^(.*)i$") if stem then if base.soft then -- [[Blíženci]] "Gemini" -- Since the nominative singular has no ending. lemma = com.convert_paired_plain_to_palatal(stem, ending) else lemma = undo_second_palatalization(base, stem) end else stem = rmatch(base.lemma, "^(.*)ové$") or rmatch(base.lemma, "^(.*)[éyě]$") or rmatch(base.lemma, "^(.*)ie$") if stem then -- [[manželé]] "married couple", [[Velšané]] "Welsh people" lemma = stem else error(("Masculine plural-only lemma '%s' should end in -i, -ové or -é"):format(base.lemma)) end end if stems.reducible == nil then if rfind(lemma, com.cons_c .. "[ck]$") and not com.is_monosyllabic(base.lemma) then stems.reducible = true end if stems.reducible then lemma = dereduce(base, lemma) end end break elseif base.gender == "ž" then stem = rmatch(base.lemma, "^(.*)y$") if stem then lemma = stem .. "a" break end stem = rmatch(base.lemma, "^(.*)[eě]$") if stem then -- Singular like the plural. Cons-stem feminines like [[dlaň]] "palm (of the hand)" have identical -- plurals to soft-stem feminines like [[růže]] (modulo e/ě differences), so we don't need to -- reconstruct the former type. lemma = base.lemma break end stem = rmatch(base.lemma, "^(.*)i$") if stem then -- i-stems. lemma = stem base.istem = true break end error(("Feminine plural-only lemma '%s' should end in -y, -ě, -e or -i"):format(base.lemma)) elseif base.gender == "n" then -- -ata nouns like [[slůně]] "baby elephant" nom pl 'slůňata' are declined in the plural same as if -- the singular were 'slůňato' so we don't have to worry about them. stem = rmatch(base.lemma, "^(.*)a$") if stem then lemma = stem .. "o" break end stem = rmatch(base.lemma, "^(.*)[eěí]$") if stem then -- singular lemma also in -e, -ě or -í; e.g. [[věčná loviště]] "[[happy hunting ground]]" lemma = base.lemma break end error(("Neuter plural-only lemma '%s' should end in -a, -í, -ě or -e"):format(base.lemma)) else error(("Internal error: Unrecognized gender '%s'"):format(base.gender)) end end if lemma_determined and lemma_determined ~= lemma then error(("Attempt to set two different singular lemmas '%s' and '%s'"):format(lemma_determined, lemma)) end lemma_determined = lemma end base.lemma = lemma_determined end -- For an adjectival lemma, synthesize the masc singular form. local function synthesize_adj_lemma(base) local stem if base.indecl then base.decl = "indecl" stem = base.lemma else local gender, number local function sub_ov(stem) stem = stem:gsub("ov$", "ův") return stem end while true do if base.number == "mn" then if base.gender == "m" then stem = rmatch(base.lemma, "^(.*)í$") if stem then if base.soft then -- nothing to do else if base.animacy ~= "wos" then error(("Masculine plural-only adjectival lemma '%s' ending in -í can only be animate unless '.soft' is specified"): format(base.lemma)) end base.lemma = undo_second_palatalization(base, stem, "is adjective") .. "ý" end break end stem = rmatch(base.lemma, "^(.*)é$") if stem then if base.animacy == "wos" then error(("Masculine plural-only adjectival lemma '%s' ending in -é must be inanimate"): format(base.lemma)) end base.lemma = stem .. "ý" break end stem = rmatch(base.lemma, "^(.*ov)i$") or rmatch(base.lemma, "^(.*in)i$") if stem then if base.animacy ~= "wos" then error(("Masculine plural-only possessive adjectival lemma '%s' ending in -i must be animate"): format(base.lemma)) end base.lemma = sub_ov(stem) break end stem = rmatch(base.lemma, "^(.*ov)y$") or rmatch(base.lemma, "^(.*in)y$") if stem then if base.animacy == "wos" then error(("Masculine plural-only possessive adjectival lemma '%s' ending in -y must be inanimate"): format(base.lemma)) end base.lemma = sub_ov(stem) break end if base.animacy == "wos" then error(("Animate masculine plural-only adjectival lemma '%s' should end in -í, -ovi or -ini"): format(base.lemma)) elseif base.soft then error(("Soft masculine plural-only adjectival lemma '%s' should end in -í"):format(base.lemma)) else error(("Inanimate masculine plural-only adjectival lemma '%s' should end in -é, -ovy or -iny"): format(base.lemma)) end elseif base.gender == "ž" then stem = rmatch(base.lemma, "^(.*)é$") -- hard adjective if stem then base.lemma = stem .. "ý" break end stem = rmatch(base.lemma, "^(.*)í$") -- soft adjective if stem then break end stem = rmatch(base.lemma, "^(.*ov)y$") or rmatch(base.lemma, "^(.*in)y$") -- possessive adjective if stem then base.lemma = sub_ov(stem) break end error(("Feminine plural-only adjectival lemma '%s' should end in -é, -í, -ovy or -iny"):format(base.lemma)) else stem = rmatch(base.lemma, "^(.*)á$") -- hard adjective if stem then base.lemma = stem .. "ý" break end stem = rmatch(base.lemma, "^(.*)í$") -- soft adjective if stem then break end stem = rmatch(base.lemma, "^(.*ov)a$") or rmatch(base.lemma, "^(.*in)a$") -- possessive adjective if stem then base.lemma = sub_ov(stem) break end error(("Neuter plural-only adjectival lemma '%s' should end in -á, -í, -ova or -ina"):format(base.lemma)) end else if base.gender == "m" then stem = rmatch(base.lemma, "^(.*)[ýí]$") or rmatch(base.lemma, "^(.*)ův$") or rmatch(base.lemma, "^(.*)in$") if stem then break end error(("Masculine adjectival lemma '%s' should end in -ý, -í, -ův or -in"):format(base.lemma)) elseif base.gender == "ž" then stem = rmatch(base.lemma, "^(.*)á$") if stem then base.lemma = stem .. "ý" break end stem = rmatch(base.lemma, "^(.*)í$") if stem then break end stem = rmatch(base.lemma, "^(.*ov)a$") or rmatch(base.lemma, "^(.*in)a$") if stem then base.lemma = sub_ov(stem) break end error(("Feminine adjectival lemma '%s' should end in -á, -í, -ova or -ina"):format(base.lemma)) else stem = rmatch(base.lemma, "^(.*)í$") if stem then break end stem = rmatch(base.lemma, "^(.*ov)o$") or rmatch(base.lemma, "^(.*in)o$") if stem then base.lemma = sub_ov(stem) break end error(("Neuter adjectival lemma '%s' should end in -é, -í, -ovo or -ino"):format(base.lemma)) end end end base.decl = "adj" end -- Now set the stem sets if not given. -- Now set the stem sets if not given. if not base.stem_sets then base.stem_sets = {{reducible = false}} end for _, stems in ipairs(base.stem_sets) do -- Set the stems. stems.vowel_stem = stem stems.nonvowel_stem = stem end end -- Determine the declension based on the lemma, gender and number. The declension is set in base.decl. In the process, -- we set either base.vowel_stem (if the lemma ends in a vowel) or base.nonvowel_stem (if the lemma does not end in a -- vowel), which is used by determine_stems(). In some cases (specifically with certain foreign nouns), we set -- base.lemma to a new value; this is as if the user specified 'decllemma:'. local function determine_declension(base) if base.indecl then base.decl = "indecl" base.nonvowel_stem = base.lemma return end -- Determine declension stem = rmatch(base.lemma, "^(.*)a$") if stem then if base.gender == "m" then if base.animacy ~= "wos" then error("Masculine lemma in -a must be animate") end base.decl = "a-m" elseif base.gender == "ž" then if base.hard then base.decl = "hard-f" elseif base.soft then base.decl = "soft-f" elseif base.adje then base.decl = "adje-f" elseif rfind(base.lemma, com.velar_c .. "a$") then base.decl = "velar-f" elseif rfind(base.lemma, "[czs]" .. "a$") then base.decl = "czs-f" elseif rfind(base.lemma, com.inherently_soft_c .. "a$") then base.decl = "soft-f" else base.decl = "hard-f" end elseif base.gender == "n" then if rfind(stem, "m$") then base.decl = "ma-n" else error("Lemma ending in -a and neuter must end in -ma") end end base.vowel_stem = stem return end local ending stem, ending = rmatch(base.lemma, "^(.*)e$") if stem then if base.tstem then base.decl = "tstem-n" elseif base.adje then base.decl = "adje-n" else base.decl = "soft-n" end base.vowel_stem = stem return end stem = rmatch(base.lemma, "^(.*)o$") if stem then if base.gender == "m" then -- Cf. [[maestro]] m. base.decl = "o-m" elseif base.gender == "ž" then -- [[zoo]]; [[Žemaitsko]]? error("Feminine nouns in -o are indeclinable; use '.indecl' if needed") elseif base.hard then base.decl = "hard-n" elseif base.tstem then base.decl = "tstem-n" elseif base.nstem then base.decl = "nstem-n" elseif rfind(base.lemma, "[czs]" .. "o$") then base.decl = "czs-n" elseif rfind(base.lemma, com.inherently_soft_c .. "o$") then base.decl = "soft-n" elseif rfind(base.lemma, com.velar_c .. "o$") then base.decl = "velar-n" else base.decl = "hard-n" end base.vowel_stem = stem return end stem = rmatch(base.lemma, "^(.*)[iy]$") if stem then if base.gender == "m" then if base.adje then base.decl = "adje-m" end end base.vowel_stem = stem return end stem = rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$") if stem then if base.gender == "m" then if base.hard then base.decl = "hard-m" elseif base.soft then base.decl = "soft-m" elseif rfind(base.lemma, com.velar_c .. "$") then base.decl = "velar-m" elseif rfind(base.lemma, "[czs]" .. "$") then base.decl = "czs-m" elseif rfind(base.lemma, com.inherently_soft_c .. "$") then base.decl = "soft-m" else base.decl = "hard-m" end elseif base.gender == "ž" then if base.vstem then base.decl = "v-f" stem = rmatch(base.lemma, "^(.*)ej$") elseif base.soft then base.decl = "soft-f" elseif rfind(base.lemma, "[czs]" .. "$") then base.decl = "czs-f" else base.decl = "soft-f" end elseif base.gender == "n" then if base.foreign then stem = rmatch(base.lemma, "^(.*)um$") or rmatch(base.lemma, "^(.*)on$") if not stem then error("Unrecognized neuter foreign ending, should be -um or -on") end if base.hard then base.decl = "hard-n" elseif rfind(stem, "[eiuy]$") then base.decl = "semisoft-n" else base.decl = "hard-n" end -- set the lemma here as if decllemma: were given base.lemma = stem .. "o" base.vowel_stem = stem return else error("Neuter nouns ending in a consonant should use '.foreign' or '.decllemma:...'") end end base.nonvowel_stem = stem return end error("Unrecognized ending for lemma: '" .. base.lemma .. "'") end -- Determine the default value for the 'reducible' flag. local function determine_default_reducible(base) -- Nouns in vowels other than -a/o as well as masculine nouns ending in all vowels don't have null endings so not -- reducible. Note, we are never called on adjectival nouns. if rfind(base.lemma, "[iyuíeě]$") or base.gender == "m" and rfind(base.lemma, "[ao]$") or base.tstem then base.default_reducible = false return end local stem stem = rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$") if stem then if base.gender == "m" and rfind(stem, "e[ck]$") and not com.is_monosyllabic(stem) then base.default_reducible = true elseif base.gender == "ž" and rfind(stem, "eń$") then -- pěseń base.default_reducible = true else base.default_reducible = false end return end base.default_reducible = false end -- Determine the stems to use for each stem set: vowel and nonvowel stems, for singular -- and plural. We assume that one of base.vowel_stem or base.nonvowel_stem has been -- set in determine_declension(), depending on whether the lemma ends in -- a vowel. We construct all the rest given the reducibility, vowel alternation spec and -- any explicit stems given. We store the determined stems inside of the stem-set objects -- in `base.stem_sets`, meaning that if the user gave multiple reducible or vowel-alternation -- patterns, we will compute multiple sets of stems. The reason is that the stems may vary -- depending on the reducibility and vowel alternation. local function determine_stems(base) if not base.stem_sets then base.stem_sets = {{}} end -- Set default reducible and check for default mixed reducible, which needs to be expanded into two entries. local default_mixed_reducible = false for _, stems in ipairs(base.stem_sets) do if stems.reducible == nil then stems.reducible = base.default_reducible end end if default_mixed_reducible then local new_stem_sets = {} for _, stems in ipairs(base.stem_sets) do table.insert(new_stem_sets, stems) end base.stem_sets = new_stem_sets end -- Now determine all the stems for each stem set. for _, stems in ipairs(base.stem_sets) do local lemma_is_vowel_stem = not not base.vowel_stem if base.vowel_stem then stems.vowel_stem = base.vowel_stem stems.nonvowel_stem = stems.vowel_stem -- Apply vowel alternation first in cases like jádro -> jader; apply_vowel_alternation() will throw an error -- if the vowel being modified isn't the last vowel in the stem. stems.oblique_nonvowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.nonvowel_stem) if stems.reducible then stems.nonvowel_stem = dereduce(base, stems.nonvowel_stem) stems.oblique_nonvowel_stem = dereduce(base, stems.oblique_nonvowel_stem) end else stems.nonvowel_stem = base.nonvowel_stem -- The user specified #. E.g. nóc nocy if stems.oblique_slots then stems.oblique_slots = "all" end stems.oblique_nonvowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.nonvowel_stem) if stems.reducible then stems.vowel_stem = com.reduce(base.nonvowel_stem) if not stems.vowel_stem then error("Unable to reduce stem '" .. base.nonvowel_stem .. "'") end else stems.vowel_stem = base.nonvowel_stem end end stems.oblique_vowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.vowel_stem) end end local function detect_indicator_spec(base) if base.pron then determine_pronoun_stems(base) elseif base.det then determine_determiner_stems(base) elseif base.num then determine_numeral_stems(base) elseif base.adj then process_declnumber(base) synthesize_adj_lemma(base) elseif base.manual then if base.stem_sets then -- FIXME, maybe this should be allowed? error("Reducible and vowel alternation specs cannot be given with manual declensions") end base.stem_sets = {{reducible = false, vowel_stem = "", nonvowel_stem = ""}} base.decl = "manual" else if base.number == "mn" then synthesize_singular_lemma(base) end determine_declension(base) determine_default_reducible(base) determine_stems(base) end end local function detect_all_indicator_specs(alternant_multiword_spec) alternant_multiword_spec.sg_genders = {} alternant_multiword_spec.pl_genders = {} iut.map_word_specs(alternant_multiword_spec, function(base) detect_indicator_spec(base) if base.number ~= "mn" then alternant_multiword_spec.sg_genders[base.actual_gender] = true end if base.number ~= "jed" then -- All t-stem masculines are neuter in the plural. local plgender plgender = base.actual_gender alternant_multiword_spec.pl_genders[plgender] = true end end) if (alternant_multiword_spec.saw_pron and 1 or 0) + (alternant_multiword_spec.saw_det and 1 or 0) + (alternant_multiword_spec.saw_num and 1 or 0) > 1 then error("Can't combine pronouns, determiners and/or numerals") end end local propagate_multiword_properties local function propagate_alternant_properties(alternant_spec, property, mixed_value, nouns_only) local seen_property for _, multiword_spec in ipairs(alternant_spec.alternants) do propagate_multiword_properties(multiword_spec, property, mixed_value, nouns_only) if seen_property == nil then seen_property = multiword_spec[property] elseif multiword_spec[property] and seen_property ~= multiword_spec[property] then seen_property = mixed_value end end alternant_spec[property] = seen_property end propagate_multiword_properties = function(multiword_spec, property, mixed_value, nouns_only) local seen_property = nil local last_seen_nounal_pos = 0 local word_specs = multiword_spec.alternant_or_word_specs or multiword_spec.word_specs for i = 1, #word_specs do local is_nounal if word_specs[i].alternants then propagate_alternant_properties(word_specs[i], property, mixed_value) is_nounal = not not word_specs[i][property] elseif nouns_only then is_nounal = is_regular_noun(word_specs[i]) else is_nounal = not not word_specs[i][property] end if is_nounal then if not word_specs[i][property] then error("Internal error: noun-type word spec without " .. property .. " set") end for j = last_seen_nounal_pos + 1, i - 1 do word_specs[j][property] = word_specs[j][property] or word_specs[i][property] end last_seen_nounal_pos = i if seen_property == nil then seen_property = word_specs[i][property] elseif seen_property ~= word_specs[i][property] then seen_property = mixed_value end end end if last_seen_nounal_pos > 0 then for i = last_seen_nounal_pos + 1, #word_specs do word_specs[i][property] = word_specs[i][property] or word_specs[last_seen_nounal_pos][property] end end multiword_spec[property] = seen_property end local function propagate_properties_downward(alternant_multiword_spec, property, default_propval) local function set_and_fetch(obj, default) local retval if obj[property] then retval = obj[property] else obj[property] = default retval = default end if not obj["actual_" .. property] then obj["actual_" .. property] = retval end return retval end local propval1 = set_and_fetch(alternant_multiword_spec, default_propval) for _, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do local propval2 = set_and_fetch(alternant_or_word_spec, propval1) if alternant_or_word_spec.alternants then for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do local propval3 = set_and_fetch(multiword_spec, propval2) for _, word_spec in ipairs(multiword_spec.word_specs) do local propval4 = set_and_fetch(word_spec, propval3) if propval4 == "mixed" then -- FIXME, use clearer error message. error("Attempt to assign mixed " .. property .. " to word") end set_and_fetch(word_spec, propval4) end end else if propval2 == "mixed" then -- FIXME, use clearer error message. error("Attempt to assign mixed " .. property .. " to word") end set_and_fetch(alternant_or_word_spec, propval2) end end end --[=[ Propagate `property` (one of "animacy", "gender" or "number") from nouns to adjacent adjectives. We proceed as follows: 1. We assume the properties in question are already set on all nouns. This should happen in set_defaults_and_check_bad_indicators(). 2. We first propagate properties upwards and sideways. We recurse downwards from the top. When we encounter a multiword spec, we proceed left to right looking for a noun. When we find a noun, we fetch its property (recursing if the noun is an alternant), and propagate it to any adjectives to its left, up to the next noun to the left. When we have processed the last noun, we also propagate its property value to any adjectives to the right (to handle e.g. [[anděl strážný]] "guardian angel", where the adjective [[strážný]] should inherit the 'masculine' and 'animate' properties of [[anděl]]). Finally, we set the property value for the multiword spec itself by combining all the non-nil properties of the individual elements. If all non-nil properties have the same value, the result is that value, otherwise it is `mixed_value` (which is "mixed" for animacy and gender, but "allthree" for number). 3. When we encounter an alternant spec in this process, we recursively process each alternant (which is a multiword spec) using the previous step, and combine any non-nil properties we encounter the same way as for multiword specs. 4. The effect of steps 2 and 3 is to set the property of each alternant and multiword spec based on its children or its neighbors. ]=] local function propagate_properties(alternant_multiword_spec, property, default_propval, mixed_value) propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, "nouns only") propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, false) propagate_properties_downward(alternant_multiword_spec, property, default_propval) end local function determine_noun_status(alternant_multiword_spec) for i, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do if alternant_or_word_spec.alternants then local is_noun = false for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do for j, word_spec in ipairs(multiword_spec.word_specs) do if is_regular_noun(word_spec) then multiword_spec.first_noun = j is_noun = true break end end end if is_noun then alternant_multiword_spec.first_noun = i end elseif is_regular_noun(alternant_or_word_spec) then alternant_multiword_spec.first_noun = i return end end end -- Set the part of speech based on properties of the individual words. local function set_pos(alternant_multiword_spec) if alternant_multiword_spec.args.pos then alternant_multiword_spec.pos = alternant_multiword_spec.args.pos elseif alternant_multiword_spec.saw_pron and not alternant_multiword_spec.saw_non_pron then alternant_multiword_spec.pos = "pronoun" elseif alternant_multiword_spec.saw_det and not alternant_multiword_spec.saw_non_det then alternant_multiword_spec.pos = "determiner" elseif alternant_multiword_spec.saw_num and not alternant_multiword_spec.saw_non_num then alternant_multiword_spec.pos = "numeral" else alternant_multiword_spec.pos = "noun" end alternant_multiword_spec.plpos = require("Modul:string utilities").pluralize(alternant_multiword_spec.pos) end local function normalize_all_lemmas(alternant_multiword_spec, pagename) iut.map_word_specs(alternant_multiword_spec, function(base) if base.lemma == "" then base.lemma = pagename end base.orig_lemma = base.lemma base.orig_lemma_no_links = m_links.remove_links(base.lemma) local lemma = base.orig_lemma_no_links -- If the lemma is all-uppercase, lowercase it but note this, so that later in combine_stem_ending() we convert it -- back to uppercase. This allows us to handle all-uppercase acronyms without a lot of extra complexity. -- FIXME: This may not make sense at all. if uupper(lemma) == lemma then base.all_uppercase = true lemma = ulower(lemma) end base.actual_lemma = lemma base.lemma = base.decllemma or lemma end) end local function decline_noun(base) for _, stems in ipairs(base.stem_sets) do if not decls[base.decl] then error("Internal error: Unrecognized declension type '" .. base.decl .. "'") end decls[base.decl](base, stems) end handle_derived_slots_and_overrides(base) local function copy(from_slot, to_slot) base.forms[to_slot] = base.forms[from_slot] end if base.gender ~= "m" then copy("nom_d", "acc_d") end copy("nom_d", "voc_d") copy("dat_d", "loc_d") copy("dat_d", "ins_d") if base.actual_number ~= base.number then local source_num = base.number == "jed" and "_s" or base.number == "dw" and "_d" or "_p" local dest_num = base.number == "jed" and {"_p", "_d"} or base.number == "dw" and {"_s", "_p"} or {"_s", "_d"} for case, _ in pairs(cases) do copy(case .. source_num, case .. dest_num) copy("nom" .. source_num .. "_linked", "nom" .. dest_num .. "_linked") end if base.actual_number ~= "allthree" then local erase_num = base.actual_number == "jed" and {"_d", "_p"} or base.actual_number == "dw" and {"_s", "_p"} or {"_s", "_d"} for case, _ in pairs(cases) do base.forms[case .. erase_num] = nil end base.forms["nom" .. erase_num .. "_linked"] = nil end end end local function get_variants(form) return nil --[=[ FIXME return form:find(com.VAR1) and "var1" or form:find(com.VAR2) and "var2" or form:find(com.VAR3) and "var3" or nil ]=] end -- Compute the categories to add the noun to, as well as the annotation to display in the -- declension title bar. We combine the code to do these functions as both categories and -- title bar contain similar information. local function compute_categories_and_annotation(alternant_multiword_spec) local all_cats = {} local function insert(cattype) m_table.insertIfNot(all_cats, "Upper Sorbian " .. cattype) end if alternant_multiword_spec.pos == "noun" then if alternant_multiword_spec.actual_number == "jed" then insert("uncountable nouns") elseif alternant_multiword_spec.actual_number == "dw" then insert("dualia tantum") elseif alternant_multiword_spec.actual_number == "mn" then insert("pluralia tantum") end end local annotation local annparts = {} local decldescs = {} local vowelalts = {} local foreign = {} local irregs = {} local stemspecs = {} local reducible = nil local function get_genanim(gender, animacy) local gender_code_to_desc = { m = "masculine", f = "feminine", n = "neuter", none = nil, } local animacy_code_to_desc = { pr = "personal", anml = "animal", inan = "inanimate", none = nil, } local descs = {} table.insert(descs, gender_code_to_desc[gender]) if gender ~= "ž" and gender ~= "n" then -- masculine or "none" (e.g. certain pronouns and numerals) table.insert(descs, animacy_code_to_desc[animacy]) end return table.concat(descs, " ") end local function trim(text) text = text:gsub(" +", " ") return mw.text.trim(text) end local function do_word_spec(base) local actual_genanim = get_genanim(base.actual_gender, base.actual_animacy) local declined_genanim = get_genanim(base.gender, base.animacy) local genanim genanim = actual_genanim if base.actual_gender == "m" then insert(actual_genanim .. " " .. alternant_multiword_spec.plpos) end for _, stems in ipairs(base.stem_sets) do local props = declprops[base.decl] local cats = props.cat if type(cats) == "function" then cats = cats(base, stems) end if type(cats) == "string" then cats = {cats} end local default_desc for i, cat in ipairs(cats) do if not cat:find("GENDER") and not cat:find("GENPOS") and not cat:find("POS") then cat = cat end cat = cat:gsub("GENPOS", "GENDER POS") if not cat:find("POS") then cat = cat .. " POS" end if i == #cats then default_desc = cat:gsub(" POS", "") end cat = cat:gsub("GENDER", actual_genanim) cat = cat:gsub("POS", alternant_multiword_spec.plpos) -- Need to trim `cat` because actual_genanim may be an empty string. insert(trim(cat)) end local desc = props.desc if type(desc) == "function" then desc = desc(base, stems) end desc = desc or default_desc desc = desc:gsub("GENDER", genanim) -- Need to trim `desc` because genanim may be an empty string. m_table.insertIfNot(decldescs, trim(desc)) local vowelalt if stems.vowelalt == "quant" then vowelalt = "quant-alt" insert("nouns with quantitative vowel alternation") elseif stems.vowelalt == "quant-ě" then vowelalt = "í-ě-alt" insert("nouns with í-ě alternation") end if vowelalt then m_table.insertIfNot(vowelalts, vowelalt) end if reducible == nil then reducible = stems.reducible end if stems.reducible then insert("nouns with reducible stem") end if base.foreign then m_table.insertIfNot(foreign, "foreign") if not base.decllemma then -- NOTE: there are nouns that use both 'foreign' and 'decllemma', e.g. [[Zeus]]. insert("nouns with regular foreign declension") end end -- User-specified 'decllemma:' indicates irregular stem. Don't consider foreign nouns in -us/-os/-es, -um/-on or -- silent -e (e.g. [[software]]) where this ending is simply dropped in oblique and plural forms as irregular; -- there are too many of these and they are already categorized above as 'nouns with regular foreign declension'. if base.decllemma then m_table.insertIfNot(irregs, "irreg-stem") insert("nouns with irregular stem") end m_table.insertIfNot(stemspecs, stems.vowel_stem) end end local key_entry = alternant_multiword_spec.first_noun or 1 if #alternant_multiword_spec.alternant_or_word_specs >= key_entry then local alternant_or_word_spec = alternant_multiword_spec.alternant_or_word_specs[key_entry] if alternant_or_word_spec.alternants then for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do key_entry = multiword_spec.first_noun or 1 if #multiword_spec.word_specs >= key_entry then do_word_spec(multiword_spec.word_specs[key_entry]) end end else do_word_spec(alternant_or_word_spec) end end if alternant_multiword_spec.actual_number == "jed" or alternant_multiword_spec.actual_number == "mn" or alternant_multiword_spec.actual_number == "dw" then -- not "allthree" or "none" (for [[sebe]]) table.insert(annparts, alternant_multiword_spec.actual_number == "jed" and "sg-only" or alternant_multiword_spec.actual_number == "dw" and "du-only" or "pl-only") end if #decldescs == 0 then table.insert(annparts, "indecl") else table.insert(annparts, table.concat(decldescs, " // ")) end if #vowelalts > 0 then table.insert(annparts, table.concat(vowelalts, "/")) end if reducible == "mixed" then table.insert(annparts, "mixed-reducible") elseif reducible then table.insert(annparts, "reducible") end if #foreign > 0 then table.insert(annparts, table.concat(foreign, " // ")) end if #irregs > 0 then table.insert(annparts, table.concat(irregs, " // ")) end alternant_multiword_spec.annotation = table.concat(annparts, " ") if #stemspecs > 1 then insert("nouns with multiple stems") end if alternant_multiword_spec.actual_number == "allthree" and not m_table.deepEquals(alternant_multiword_spec.sg_genders, alternant_multiword_spec.pl_genders) then insert("nouns that change gender in the plural") end alternant_multiword_spec.categories = all_cats end local function show_forms(alternant_multiword_spec) local lemmas = {} for _, slot in ipairs(potential_lemma_slots) do if alternant_multiword_spec.forms[slot] then for _, formobj in ipairs(alternant_multiword_spec.forms[slot]) do -- FIXME, now can support footnotes as qualifiers in headwords? table.insert(lemmas, formobj.form) end break end end local props = { lemmas = lemmas, slot_table = alternant_multiword_spec.output_noun_slots, lang = lang, canonicalize = function(form) -- return com.remove_variant_codes(form) return form end, } iut.show_forms(alternant_multiword_spec.forms, props) end local function make_table(alternant_multiword_spec) local forms = alternant_multiword_spec.forms local function template_prelude(min_width) return rsub([=[ <div> <div class="NavFrame" style="max-width: MINWIDTHem"> <div class="NavHead" style="background:#eff7ff">{title}{annotation}</div> <div class="NavContent"> {\op}| style="background:#F9F9F9;text-align:center;width:100%;display:table" class="inflection-table" |- ]=], "MINWIDTH", min_width) end local function template_postlude() return [=[ |{\cl}{notes_clause}</div></div></div>]=] end local table_spec_allthree = template_prelude("45") .. [=[ ! style="width:33%;background:#d9ebff" | ! style="background:#d9ebff" | jednota ! style="background:#d9ebff" | dwojota ! style="background:#d9ebff" | mnohota |- !style="background:#eff7ff"|mjenowak | {nom_s} | {nom_d} | {nom_p} |- !style="background:#eff7ff"|rodźak | {gen_s} | {gen_d} | {gen_p} |- !style="background:#eff7ff"|dawak | {dat_s} | {dat_d} | {dat_p} |- !style="background:#eff7ff"|žadak | {acc_s} | {acc_d} | {acc_p} |- !style="background:#eff7ff"|posrědnik | {ins_s} | {ins_d} | {ins_p} |- !style="background:#eff7ff"|měsćak | {loc_s} | {loc_d} | {loc_p} |- !style="background:#eff7ff"|wołak | {voc_s} | {voc_d} | {voc_p} ]=] .. template_postlude() local function get_table_spec_one_number(number, numcode) local table_spec_one_number = [=[ ! style="width:33%;background:#d9ebff" | ! style="background:#d9ebff" | NUMBER |- !style="background:#eff7ff"|mjenowak | {nom_CODE} |- !style="background:#eff7ff"|rodźak | {gen_CODE} |- !style="background:#eff7ff"|dawak | {dat_CODE} |- !style="background:#eff7ff"|žadak | {acc_CODE} |- !style="background:#eff7ff"|posrědnik | {ins_CODE} |- !style="background:#eff7ff"|měsćak | {loc_CODE} |- !style="background:#eff7ff"|wołak | {voc_CODE} ]=] return template_prelude("30") .. table_spec_one_number:gsub("NUMBER", number):gsub("CODE", numcode) .. template_postlude() end local function get_table_spec_one_number_clitic(number, numcode) local table_spec_one_number_clitic = [=[ ! rowspan=2 style="width:33%;background:#d9ebff"| ! colspan=2 style="background:#d9ebff" | NUMBER |- ! style="width:33%;background:#d9ebff" | stressed ! style="background:#d9ebff" | clitic |- !style="background:#eff7ff"|mjenowak | colspan=2 | {nom_CODE} |- !style="background:#eff7ff"|rodźak | {gen_CODE} | {clitic_gen_CODE} |- !style="background:#eff7ff"|dawak | {dat_CODE} | {clitic_dat_CODE} |- !style="background:#eff7ff"|žadak | {acc_CODE} | {clitic_acc_CODE} |- !style="background:#eff7ff"|posrědnik | colspan=2 | {ins_CODE} |- !style="background:#eff7ff"|měsćak | colspan=2 | {loc_CODE} |- !style="background:#eff7ff"|wołak | colspan=2 | {voc_CODE} ]=] return template_prelude("40") .. table_spec_one_number_clitic:gsub("NUMBER", number):gsub("CODE", numcode) .. template_postlude() end local notes_template = [=[ <div style="width:100%;text-align:left;background:#d9ebff"> <div style="display:inline-block;text-align:left;padding-left:1em;padding-right:1em"> {footnote} </div></div> ]=] if alternant_multiword_spec.title then forms.title = alternant_multiword_spec.title else forms.title = 'Skłonjowanje <i lang="hsb">' .. forms.lemma .. '</i>' end local annotation = alternant_multiword_spec.annotation if annotation == "" then forms.annotation = "" else forms.annotation = " (<span style=\"font-size: smaller;\">" .. annotation .. "</span>)" end local number, numcode if alternant_multiword_spec.actual_number == "jed" then number, numcode = "jednota", "s" elseif alternant_multiword_spec.actual_number == "dw" then number, numcode = "dwojota", "d" elseif alternant_multiword_spec.actual_number == "mn" then number, numcode = "mnohota", "p" elseif alternant_multiword_spec.actual_number == "none" then -- used for [[sebe]] number, numcode = "", "s" end local table_spec = alternant_multiword_spec.actual_number == "allthree" and table_spec_allthree or alternant_multiword_spec.has_clitic and get_table_spec_one_number_clitic(number, numcode) or get_table_spec_one_number(number, numcode) forms.notes_clause = forms.footnote ~= "" and m_string_utilities.format(notes_template, forms) or "" return m_string_utilities.format(table_spec, forms) end local function compute_headword_genders(alternant_multiword_spec) local genders = {} local number if alternant_multiword_spec.actual_number == "mn" then number = "-p" elseif alternant_multiword_spec.actual_number == "dw" then number = "-d" else number = "" end iut.map_word_specs(alternant_multiword_spec, function(base) local animacy = base.animacy if animacy == "wěc" then animacy = "in" end m_table.insertIfNot(genders, base.gender .. "-" .. animacy .. number) end) return genders end -- Externally callable function to parse and decline a noun given user-specified arguments. -- Return value is ALTERNANT_MULTIWORD_SPEC, an object where the declined forms are in -- `ALTERNANT_MULTIWORD_SPEC.forms` for each slot. If there are no values for a slot, the -- slot key will be missing. The value for a given slot is a list of objects -- {form=FORM, footnotes=FOOTNOTES}. function export.do_generate_forms(parent_args, from_headword) local params = { [1] = {required = true, default = "žona<f>"}, title = {}, pagename = {}, json = {type = "boolean"}, pos = {}, } if from_headword then params["head"] = {list = true} params["lemma"] = {list = true} params["g"] = {list = true} params["ž"] = {list = true} params["m"] = {list = true} params["adj"] = {list = true} params["dim"] = {list = true} params["id"] = {} end local args = m_para.process(parent_args, params) local parse_props = { parse_indicator_spec = parse_indicator_spec, angle_brackets_omittable = true, allow_blank_lemma = true, } local alternant_multiword_spec = iut.parse_inflected_text(args[1], parse_props) alternant_multiword_spec.title = args.title alternant_multiword_spec.args = args local pagename = args.pagename or from_headword and args.head[1] or mw.title.getCurrentTitle().subpageText normalize_all_lemmas(alternant_multiword_spec, pagename) set_all_defaults_and_check_bad_indicators(alternant_multiword_spec) -- These need to happen before detect_all_indicator_specs() so that adjectives get their genders and numbers set -- appropriately, which are needed to correctly synthesize the adjective lemma. propagate_properties(alternant_multiword_spec, "animacy", "wěc", "mixed") propagate_properties(alternant_multiword_spec, "number", "allthree", "allthree") -- FIXME, the default value (third param) used to be 'm' with a comment indicating that this applied only to -- plural adjectives, where it didn't matter; but here, plural adjectives are distinguished for gender and -- animacy. Make sure 'mixed' works. propagate_properties(alternant_multiword_spec, "gender", "mixed", "mixed") detect_all_indicator_specs(alternant_multiword_spec) -- Propagate 'actual_number' after calling detect_all_indicator_specs(), which sets 'actual_number' for adjectives. propagate_properties(alternant_multiword_spec, "actual_number", "allthree", "allthree") determine_noun_status(alternant_multiword_spec) set_pos(alternant_multiword_spec) alternant_multiword_spec.output_noun_slots = get_output_noun_slots(alternant_multiword_spec) local inflect_props = { skip_slot = function(slot) return skip_slot(alternant_multiword_spec.actual_number, slot) end, slot_table = alternant_multiword_spec.output_noun_slots, get_variants = get_variants, inflect_word_spec = decline_noun, } iut.inflect_multiword_or_alternant_multiword_spec(alternant_multiword_spec, inflect_props) compute_categories_and_annotation(alternant_multiword_spec) alternant_multiword_spec.genders = compute_headword_genders(alternant_multiword_spec) if args.json then alternant_multiword_spec.args = nil return require("Modul:JSON").toJSON(alternant_multiword_spec) end return alternant_multiword_spec end -- Entry point for {{hsb-ndecl}}. Template-callable function to parse and decline a noun given -- user-specified arguments and generate a displayable table of the declined forms. function export.show(frame) local parent_args = frame:getParent().args local alternant_multiword_spec = export.do_generate_forms(parent_args) if type(alternant_multiword_spec) == "string" then -- JSON return value return alternant_multiword_spec end show_forms(alternant_multiword_spec) return make_table(alternant_multiword_spec) .. require("Modul:utilities").format_categories(alternant_multiword_spec.categories, lang, nil, nil, force_cat) end return export 0b5tembci6q0pzquj4l8ipy1nttusbi 17587 17584 2024-11-22T09:25:29Z Sławobóg 2519 17587 Scribunto text/plain local export = {} --[=[ Authorship: Zhnka ]=] --[=[ TERMINOLOGY: -- "slot" = A particular combination of case/number. Example slot names for nouns are "gen_s" (genitive singular) and "voc_p" (vocative plural). Each slot is filled with zero or more forms. -- "form" = The declined form representing the value of a given slot. -- "lemma" = The dictionary form. Generally the nominative masculine singular, but may occasionally be another form if the nominative masculine singular is missing. ]=] local lang = require("Modul:languages").getByCode("hsb") local m_table = require("Modul:table") local m_links = require("Modul:links") local m_string_utilities = require("Modul:string utilities") local iut = require("Modul:inflection utilities") local m_para = require("Modul:parameters") local com = require("Modul:hsb-common") local current_title = mw.title.getCurrentTitle() local NAMESPACE = current_title.nsText local PAGENAME = current_title.text local u = mw.ustring.char local rsplit = mw.text.split local rfind = mw.ustring.find local rmatch = mw.ustring.match local rgmatch = mw.ustring.gmatch local rsubn = mw.ustring.gsub local ulen = mw.ustring.len local usub = mw.ustring.sub local uupper = mw.ustring.upper local ulower = mw.ustring.lower local force_cat = false -- set to true to make categories appear in non-mainspace pages, for testing -- version of rsubn() that discards all but the first return value local function rsub(term, foo, bar) local retval = rsubn(term, foo, bar) return retval end -- version of rsubn() that returns a 2nd argument boolean indicating whether -- a substitution was made. local function rsubb(term, foo, bar) local retval, nsubs = rsubn(term, foo, bar) return retval, nsubs > 0 end local function track(track_id) require("Modul:debug/track")("hsb-noun/" .. track_id) return true end local output_noun_slots = { nom_s = "nom|s", gen_s = "gen|s", dat_s = "dat|s", acc_s = "acc|s", voc_s = "voc|s", loc_s = "loc|s", ins_s = "ins|s", nom_d = "nom|d", gen_d = "gen|d", dat_d = "dat|d", acc_d = "acc|d", voc_d = "voc|d", loc_d = "loc|d", ins_d = "ins|d", nom_p = "nom|p", gen_p = "gen|p", dat_p = "dat|p", acc_p = "acc|p", voc_p = "voc|p", loc_p = "loc|p", ins_p = "ins|p", } local function get_output_noun_slots(alternant_multiword_spec) -- FIXME: To save memory we modify the table in-place. This won't work if we ever end up with multiple calls to -- this module in the same Lua invocation, and we would need to clone the table. if alternant_multiword_spec.actual_number ~= "allthree" then for slot, accel_form in pairs(output_noun_slots) do output_noun_slots[slot] = accel_form:gsub("|[sp]$", "") end end return output_noun_slots end local potential_lemma_slots = {"nom_s", "nom_p", "gen_s"} local cases = { nom = true, gen = true, dat = true, acc = true, voc = true, loc = true, ins = true, } local clitic_cases = { gen = true, dat = true, acc = true, } local function dereduce(base, stem) local dereduced_stem = com.dereduce(base, stem) if not dereduced_stem then error("Unable to dereduce stem '" .. stem .. "'") end return dereduced_stem end local function skip_slot(number, slot) return number == "jed" and rfind(slot, "_p$") or number == "mn" and rfind(slot, "_s$") end -- Basic function to combine stem(s) and ending(s) and insert the result into the appropriate slot. `stems` is either -- the `stems` object passed into the declension functions (containing the various stems; see below) or a string to -- override the stem. (NOTE: If you pass a string in as `stems`, you should pass the value of `stems.footnotes` as the -- value of `footnotes` as it will be lost otherwise. If you need to supply your own footnote in addition, use -- iut.combine_footnotes() to combine any user-specified footnote(s) with your footnote(s).) `endings` is either a -- string specifying a single ending or a list of endings. If `endings` is nil, no forms are inserted. If an ending is -- "-", the value of `stems` is ignored and the lemma is used instead as the stem; this is important in case the user -- used `decllemma:` to specify a declension lemma different from the actual lemma, or specified '.foreign' (which has -- a similar effect). local function add(base, slot, stems, endings, footnotes) if not endings then return end -- Call skip_slot() based on the declined number; if the actual number is different, we correct this in -- decline_noun() at the end. if skip_slot(base.number, slot) then return end local stems_footnotes = type(stems) == "table" and stems.footnotes or nil footnotes = iut.combine_footnotes(iut.combine_footnotes(base.footnotes, stems_footnotes), footnotes) if type(endings) == "string" then endings = {endings} end for _, ending in ipairs(endings) do -- Compute the stem. If ending is "-", use the lemma regardless. Otherwise if `stems` is a string, use it. -- Otherwise `stems` is an object containing four stems (vowel-vs-non-vowel cross regular-vs-oblique); -- compute the appropriate stem based on the slot and whether the ending begins with a vowel. local stem if ending == "-" then stem = base.actual_lemma ending = "" elseif type(stems) == "string" then stem = stems else local is_vowel_ending = rfind(ending, "^" .. com.vowel_c) if stems.oblique_slots == "all" then if is_vowel_ending then stem = stems.oblique_vowel_stem else stem = stems.oblique_nonvowel_stem end elseif is_vowel_ending then stem = stems.vowel_stem else stem = stems.nonvowel_stem end end ending = iut.combine_form_and_footnotes(ending, footnotes) local function combine_stem_ending(stem, ending) return com.combine_stem_ending(base, slot, stem, ending) end iut.add_forms(base.forms, slot, stem, ending, combine_stem_ending) end end local function process_slot_overrides(base, do_slot) for slot, overrides in pairs(base.overrides) do -- Call skip_slot() based on the declined number; if the actual number is different, we correct this in -- decline_noun() at the end. if skip_slot(base.number, slot) then error("Override specified for invalid slot '" .. slot .. "' due to '" .. base.number .. "' number restriction") end if do_slot(slot) then base.slot_overridden[slot] = true base.forms[slot] = nil for _, override in ipairs(overrides) do for _, value in ipairs(override.values) do local form = value.form local combined_notes = iut.combine_footnotes(base.footnotes, value.footnotes) if override.full then if form ~= "" then iut.insert_form(base.forms, slot, {form = form, footnotes = combined_notes}) end else -- Convert a null ending to "-" in the acc/voc sg slots so that e.g. [[Kerberos]] declared as -- <m.sg.foreign.gena:u.acc-:a> works correctly and generates accusative 'Kerberos/Kerbera' not -- #'Kerber/Kerbera'. if (slot == "acc_s" or slot == "voc_s") and form == "" then form = "-" end for _, stems in ipairs(base.stem_sets) do add(base, slot, stems, form, combined_notes) end end end end end end end local function add_decl(base, stems, gen_s, dat_s, acc_s, voc_s, loc_s, ins_s, nom_d, gen_d, dat_d, nom_p, gen_p, dat_p, acc_p, loc_p, ins_p, nom_s, footnotes ) add(base, "nom_s", stems, "-", footnotes) add(base, "gen_s", stems, gen_s, footnotes) add(base, "dat_s", stems, dat_s, footnotes) add(base, "acc_s", stems, acc_s, footnotes) add(base, "voc_s", stems, voc_s, footnotes) add(base, "loc_s", stems, loc_s, footnotes) add(base, "ins_s", stems, ins_s, footnotes) add(base, "nom_d", stems, nom_d, footnotes) add(base, "gen_d", stems, gen_d, footnotes) add(base, "dat_d", stems, dat_d, footnotes) if base.number == "mn" then -- If this is a plurale tantum noun and we're processing the nominative plural, use the user-specified lemma -- rather than generating the plural from the synthesized singular, which may not match the specified lemma -- (e.g. [[tvargle]] "Olomouc cheese" using <m.pl.mixed> would try to generate 'tvargle/tvargly', and [[peníze]] -- "money" using <m.pl.#ě.genpl-> would try to generate 'peněze'). local acc_p_like_nom = m_table.deepEquals(nom_p, acc_p) nom_p = "-" if acc_p_like_nom then acc_p = "-" end end add(base, "nom_p", stems, nom_p, footnotes) add(base, "gen_p", stems, gen_p, footnotes) add(base, "dat_p", stems, dat_p, footnotes) add(base, "acc_p", stems, acc_p, footnotes) add(base, "loc_p", stems, loc_p, footnotes) add(base, "ins_p", stems, ins_p, footnotes) add(base, "nom_s", stems, nom_s, footnotes) end local function add_sg_decl(base, stems, gen_s, dat_s, acc_s, voc_s, loc_s, ins_s, footnotes ) add_decl(base, stems, gen_s, dat_s, acc_s, voc_s, loc_s, ins_s, nil, nil, nil, nil, nil, nil, nil, nil, nil, footnotes) end local function add_du_only_decl(base, stems, gen_d, dat_d, footnotes ) add_decl(base, stems, nil, nil, nil, nil, nil, nil, "-", gen_d, dat_d, nil, nil, nil, nil, nil, nil, footnotes) end local function add_pl_only_decl(base, stems, gen_p, dat_p, acc_p, loc_p, ins_p, footnotes ) add_decl(base, stems, nil, nil, nil, nil, nil, nil, nil, nil, nil, "-", gen_p, dat_p, acc_p, loc_p, ins_p, footnotes) end local function handle_derived_slots_and_overrides(base) local function is_non_derived_slot(slot) return slot ~= "voc_p" and slot ~= "acc_s" and slot ~= "clitic_acc_s" end local function is_derived_slot(slot) return not is_non_derived_slot(slot) end base.slot_overridden = {} -- Handle overrides for the non-derived slots. Do this before generating the derived -- slots so overrides of the source slots (e.g. nom_p) propagate to the derived slots. process_slot_overrides(base, is_non_derived_slot) -- Generate the remaining slots that are derived from other slots. if not base.pron and not base.det then -- Pronouns don't have a vocative (singular or plural). iut.insert_forms(base.forms, "voc_p", base.forms.nom_p) end if not base.forms.acc_s and not base.slot_overridden.acc_s then iut.insert_forms(base.forms, "acc_s", base.forms[base.animacy == "wěc" and "nom_s" or base.animacy == "wos" and "gen_s" or base.animacy == "zwj" and "gen_s"]) end if not base.forms.acc_d and not base.slot_overridden.acc_d then iut.insert_forms(base.forms, "acc_d", base.forms[base.animacy == "wěc" and "nom_d" or base.animacy == "wos" and "gen_d" or base.animacy == "zwj" and "nom_d"]) end if not base.forms.acc_p and not base.slot_overridden.acc_p then iut.insert_forms(base.forms, "acc_p", base.forms[base.animacy == "wěc" and "nom_p" or base.animacy == "wos" and "gen_p" or base.animacy == "zwj" and "nom_p"]) end if not base.forms.clitic_acc_s and not base.slot_overridden.clitic_acc_s then iut.insert_forms(base.forms, "clitic_acc_s", base.forms[base.animacy == "wěc" and "nom_s" or "clitic_gen_s"]) end -- Handle overrides for derived slots, to allow them to be overridden. process_slot_overrides(base, is_derived_slot) -- Compute linked versions of potential lemma slots, for use in {{hsb-noun}}. -- We substitute the original lemma (before removing links) for forms that -- are the same as the lemma, if the original lemma has links. for _, slot in ipairs(potential_lemma_slots) do iut.insert_forms(base.forms, slot .. "_linked", iut.map_forms(base.forms[slot], function(form) if form == base.orig_lemma_no_links and rfind(base.orig_lemma, "%[%[") then return base.orig_lemma else return form end end)) end end -- Table mapping declension types to functions to decline the noun. The function takes two arguments, `base` and -- `stems`; the latter specifies the computed stems (vowel vs. non-vowel, singular vs. plural) and whether the noun -- is reducible and/or has vowel alternations in the stem. Most of the specifics of determining which stem to use -- and how to modify it for the given ending are handled in add_decl(); the declension functions just need to generate -- the appropriate endings. local decls = {} -- Table specifying additional properties for declension types. Every declension type must have such a table, which -- specifies which category or categories to add and what annotation to show in the title bar of the declension table. -- -- * Only the `cat` property of this table is mandatory; there is also a `desc` property to specify the annotation, but -- this can be omitted and the annotation will then be computed from the `cat` property. The `cat` property is either -- a string, a list of strings or a function (of two arguments, `base` and `stems` as above) returning a string or -- list of strings. The string can contain the keywords GENDER to substitute the gender (and animacy for masculine -- nouns) and POS (to substitute the pluralized part of speech). The keyword GENPOS is equivalent to 'GENDER POS'. If -- no keyword is present, ' GENPOS' is added onto the end. If only GENDER is present, ' POS' is added onto the end. -- In all cases, the language name is added onto the beginning to form the full category name. -- * The `desc` property is of the same form as the `cat` property and specifies the annotation to display in the title -- bar (which may have the same format as the category minus the part of speech, or may be abbreviated). The value -- may not be a list of strings, as only one annotation is displayed. If omitted, it is derived from the category -- spec(s) by taking the last category (if more than one is given) and removing ' POS' before keyword substitution. local declprops = {} decls["hard-m"] = function(base, stems) local gen_s = base.animacy == "in" and {"a", "u"} or "a" local nom_p = base.animacy == "wos" and "ojo" or "y" local voc_s = not rmatch(base.lemma, ".*tr$") and "o" add_decl(base, stems, gen_s, "ej", acc_s, voc_s, "u", "om", "aj", "ow", "omaj", nom_p, "ow", "am", nil, "ach", "ami") add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, nil, nil, "e", "e") end declprops["hard-m"] = { desc = function(base, stems) return "muski twjerdy zdónk" end, cat = function(base, stems) return "muski twjerdy zdónk" end } decls["soft-m"] = function(base, stems) local gen_s = base.animacy == "in" and {"a", "u"} or "a" local nom_p = base.animacy == "wos" and "ojo" or "e" add_decl(base, com.addj(stems.oblique_vowel_stem), gen_s, "ej", acc_s, "o", "u", "om", "ej", "ow", "omaj", nom_p, "ow", "am", nil, "ach", "emi") end declprops["soft-m"] = { desc = function(base, stems) return "muski mjechki zdónk" end, cat = function(base, stems) return "muski mjechki zdónk" end } decls["czs-m"] = function(base, stems) local gen_s = base.animacy == "in" and {"a", "u"} or "a" local nom_p = base.animacy == "wos" and "ojo" or "y" add_decl(base, stems, gen_s, "ej", acc_s, "o", "u", "om", "aj", "ow", "omaj", nom_p, "ow", "am", nil, "ach", "ami") end declprops["czs-m"] = { desc = function(base, stems) return "muski twjerdy syčacy zdónk" end, cat = function(base, stems) return "muski twjerdy zdónk" end } decls["velar-m"] = function(base, stems) local gen_s = base.animacy == "in" and {"a", "u"} or "a" local nom_p = base.animacy == "wos" and "ojo" or "i" add_decl(base, stems, gen_s, "ej", acc_s, "o", "u", "om", "aj", "ow", "omaj", nom_p, "ow", "am", nil, "ach", "ami") add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, nil, nil, nil, "e") end declprops["velar-m"] = { desc = function(base, stems) return "muski welarny zdónk" end, cat = function(base, stems) return "muski welarny zdónk" end } decls["adje-m"] = function(base, stems) if rmatch(base.lemma, "^.*" .. com.velar_c .. "i$") then add_decl(base, stems, "eho", "emu", nil, "-", "im", "im", "aj", "eju", "imaj", nom_p, "ich", "im", nil, "ich", "imi") if base.animacy == "wos" then add_decl(base, com.apply_palatalization(stems.oblique_vowel_stem), nil, nil, nil, nil, nil, nil, nil, nil, nil, "y") else add_decl(base, stems, nil, nil, nil, nil, nil, nil, nil, nil, nil, "e") end elseif rmatch(base.lemma, "^.*" .. com.inherently_soft_c .. "i$") then local nom_p = base.animacy == "wos" and "i" or "e" add_decl(base, stems, "eho", "emu", nil, "-", "im", "im", "ej", "eju", "imaj", nom_p, "ich", "im", nil, "ich", "imi") elseif rmatch(base.lemma, "^.*[czs]e$") then local nom_p = base.animacy == "wos" and "y" or "e" add_decl(base, stems, "eho", "emu", nil, "-", "ym", "ym", "aj", "eju", "ymaj", nom_p, "ych", "ym", nil, "ych", "ymi") else add_decl(base, stems, "eho", "emu", nil, "-", "ym", "ym", "aj", "eju", "ymaj", nom_p, "ych", "ym", nil, "ych", "ymi") if base.animacy == "wos" then add_decl(base, com.apply_palatalization(stems.oblique_vowel_stem), nil, nil, nil, nil, nil, nil, nil, nil, nil, "i") else add_decl(base, stems, nil, nil, nil, nil, nil, nil, nil, nil, nil, "e") end end end declprops["adje-m"] = { desc = function(base, stems) return "" end, cat = function(base, stems) return "masculine adjectival" end } decls["hard-f"] = function(base, stems) add_decl(base, stems, "y", nil, "u", "-", nil, "u", nil, "ow", "omaj", "y", "ow", "am", "y", "ach", "ami") add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, "e", nil, nil, "e", nil, "e") end declprops["hard-f"] = { desc = function(base, stems) return "žónski twjerdy zdónk" end, cat = function(base, stems) return "žónski twjerdy zdónk" end } decls["soft-f"] = function(base, stems) if rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$") then add_decl(base, com.addj(stems.oblique_vowel_stem), "e", nil, "-", "-", nil, "u", nil, "ow", "omaj", "e", "ow", "am", "e", "ach", "emi") else add_decl(base, stems, "e", nil, "u", "-", nil, "u", nil, "ow", "omaj", "e", "ow", "am", "e", "ach", "emi") end add_decl(base, com.removej(com.addj(stems.oblique_vowel_stem)), nil, "i", nil, nil, "i", nil, "i", nil, nil, nil, "i") end declprops["soft-f"] = { desc = function(base, stems) return "žónski mjechki zdónk" end, cat = function(base, stems) return "žónski mjechki zdónk" end } decls["czs-f"] = function(base, stems) if rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$") then add_decl(base, stems, "y", "y", "-", "-", "y", "u", "y", "ow", "omaj", "y", "ow", "am", "y", "ach", "ami") else add_decl(base, stems, "y", "y", "u", "-", "y", "u", "y", "ow", "omaj", "y", "ow", "am", "y", "ach", "ami") end end declprops["czs-f"] = { desc = function(base, stems) return "žónski twjerdy syčacy zdónk" end, cat = function(base, stems) return "žónski twjerdy zdónk" end } decls["velar-f"] = function(base, stems) add_decl(base, stems, "i", nil, "u", "-", nil, "u", nil, "ow", "omaj", "i", "ow", "am", "i", "ach", "ami") add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, "e", nil, nil, "e", nil, "e") end declprops["velar-f"] = { desc = function(base, stems) return "žónski welarny zdónk" end, cat = function(base, stems) return "žónski welarny zdónk" end } decls["v-f"] = function(base, stems) add_decl(base, stems, "wje", "wi", "-", "-", "wi", "wju", "wi", "wjow", "wjomaj", "wje", "wjow", "wjam", "wje", "wjach", "wjemi") end declprops["v-f"] = { desc = function(base, stems) return "feminine v-stem" end, cat = function(base, stems) return "feminine v-stem" end } decls["adje-f"] = function(base, stems) if rmatch(base.lemma, "^.*" .. com.velar_c .. "a$") or rmatch(base.lemma, "^.*" .. com.inherently_soft_c .. "a$") then add_decl(base, stems, "eje", "ej", "u", "-", "ej", "ej", "ej", "eju", "imaj", "e", "ich", "im", "e", "ich", "imi") else add_decl(base, stems, "eje", "ej", "u", "-", "ej", "ej", "ej", "eju", "ymaj", "e", "ych", "ym", "e", "ych", "ymi") end end declprops["adje-f"] = { desc = function(base, stems) return "" end, cat = function(base, stems) return "feminine adjectival" end } decls["hard-n"] = function(base, stems) add_decl(base, stems, "a", "u", "-", "-", nil, "om", nil, "ow", "omaj", "a", "ow", "am", "a", "ach", "ami") add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, nil, nil, nil, "e", nil, "e") end declprops["hard-n"] = { desc = function(base, stems) return "ničeji twjerdy zdónk" end, cat = function(base, stems) return "ničeji twjerdy zdónk" end } decls["soft-n"] = function(base, stems) add_decl(base, stems, "a", "u", "-", "-", "u", "om", nil, "ow", "omaj", "a", "ow", "am", "a", "ach", "emi") add_decl(base, com.removej(stems.oblique_vowel_stem), nil, nil, nil, nil, nil, nil, "i") end declprops["soft-n"] = { desc = function(base, stems) return "ničeji mjechki zdónk" end, cat = function(base, stems) return "ničeji mjechki zdónk" end } decls["czs-n"] = function(base, stems) add_decl(base, stems, "a", "u", "-", "-", "u", "om", "y", "ow", "omaj", "a", "ow", "am", "a", "ach", "ami") end declprops["czs-n"] = { desc = function(base, stems) return "ničeji twjerdy syčacy zdónk" end, cat = function(base, stems) return "ničeji twjerdy zdónk" end } decls["velar-n"] = function(base, stems) add_decl(base, stems, "a", "u", "-", "-", "u", "om", nil, "ow", "omaj", "a", "ow", "am", "a", "ach", "ami") add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, nil, nil, nil, "e", nil, "e") end declprops["velar-n"] = { desc = function(base, stems) return "ničeji welarny zdónk" end, cat = function(base, stems) return "ničeji welarny zd" end } decls["adje-n"] = function(base, stems) if rmatch(base.lemma, "^.*" .. com.velar_c .. "e$") or rmatch(base.lemma, "^.*" .. com.inherently_soft_c .. "e$") then add_decl(base, stems, "eho", "emu", "-", "-", "im", "im", "ej", "eju", "imaj", "e", "ich", "im", "e", "ich", "imi") else add_decl(base, stems, "eje", "ej", "u", "-", "ym", "ym", "ej", "eju", "ymaj", "e", "ych", "ym", "e", "ych", "ymi") end end declprops["adje-n"] = { desc = function(base, stems) return "" end, cat = function(base, stems) return "neuter adjectival" end } decls["tstem-n"] = function(base, stems) add_decl(base, stems, "eća", "eću", "-", "-", "eću", "ećom", "eći", "ećow", "ećomaj", "ata", "atow", "atam", "ata", "atach", "atami") end declprops["tstem-n"] = { desc = function(base, stems) return "neuter t-stem" end, cat = function(base, stems) return "neuter t-stem" end } decls["nstem-n"] = function(base, stems) add_decl(base, stems, "enja", "enju", "-", "-", "enju", "enjom", "eni", "enjow", "enjomaj", "enja", "enjow", "enjam", "enja", "enjach", "enjami") end declprops["nstem-n"] = { desc = function(base, stems) return "neuter n-stem" end, cat = function(base, stems) return "neuter n-stem" end } decls["adj"] = function(base, stems) local props = {} local propspec = table.concat(props, ".") if propspec ~= "" then propspec = "<" .. propspec .. ">" end local adj_alternant_multiword_spec = require("Modul:zlw-ocs-adjective").do_generate_forms({base.lemma .. propspec}) local function copy(from_slot, to_slot) base.forms[to_slot] = adj_alternant_multiword_spec.forms[from_slot] end if base.number ~= "mn" then if base.gender == "m" then copy("nom_m", "nom_s") copy("gen_mn", "gen_s") copy("dat_mn", "dat_s") copy("loc_mn", "loc_s") copy("ins_mn", "ins_s") elseif base.gender == "ž" then copy("nom_f", "nom_s") copy("gen_f", "gen_s") copy("dat_f", "dat_s") copy("acc_f", "acc_s") copy("loc_f", "loc_s") copy("ins_f", "ins_s") else copy("nom_n", "nom_s") copy("gen_mn", "gen_s") copy("dat_mn", "dat_s") copy("acc_n", "acc_s") copy("loc_mn", "loc_s") copy("ins_mn", "ins_s") end if not base.forms.voc_s then iut.insert_forms(base.forms, "voc_s", base.forms.nom_s) end end if base.number ~= "jed" then if base.gender == "m" then copy("nom_mp", "nom_p") copy("acc_mfp", "acc_p") copy("nom_md", "nom_d") elseif base.gender == "ž" then copy("nom_fp", "nom_p") copy("acc_mfp", "acc_p") copy("nom_fnd", "nom_d") else copy("nom_np", "nom_p") copy("acc_np", "acc_p") copy("nom_fnd", "nom_d") end copy("gen_p", "gen_p") copy("dat_p", "dat_p") copy("ins_p", "ins_p") copy("loc_p", "loc_p") copy("gen_d", "gen_d") copy("dat_d", "dat_d") end end local function get_stemtype(base) if rfind(base.lemma, "ý$") then return "hard" elseif rfind(base.lemma, "í$") then return "soft" else return "possessive" end end declprops["adj"] = { cat = function(base, stems) return {"adjectival POS", get_stemtype(base) .. " GENDER adjectival POS"} end, } decls["indecl"] = function(base, stems) -- Indeclinable. Note that fully indeclinable nouns should not have a table at all rather than one all of whose forms -- are the same; but having an indeclinable declension is useful for nouns that may or may not be indeclinable, e.g. -- [[desatero]] "group of ten" or the plural of [[peso]], which may be indeclinable 'pesos'. add_decl(base, stems, "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-") end declprops["indecl"] = { cat = function(base, stems) if base.adj then return {"adjectival POS", "indeclinable adjectival POS", "indeclinable GENDER adjectival POS"} else return {"indeclinable POS", "indeclinable GENPOS"} end end } decls["manual"] = function(base, stems) -- Anything declined manually using overrides. We don't set any declensions except the nom_s (or nom_p if plurale -- tantum). add(base, base.number == "mn" and "nom_p" or "nom_s", stems, "-") end declprops["manual"] = { desc = "GENDER", cat = {}, } local function set_pron_defaults(base) if base.gender or base.lemma ~= "ona" and base.number or base.animacy then error("Can't specify gender, number or animacy for pronouns") end local function pron_props() -- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC. if base.lemma == "štó" then return "none", "jed", "wos", false elseif base.lemma == "što" then return "none", "jed", "wěc", false else error(("Unrecognized pronoun '%s'"):format(base.lemma)) end end local gender, number, animacy, has_clitic = pron_props() base.gender = gender base.actual_gender = gender base.number = number base.actual_number = number base.animacy = animacy base.actual_animacy = animacy base.has_clitic = has_clitic end local function determine_pronoun_stems(base) if base.stem_sets then error("Reducible and vowel alternation specs cannot be given with pronouns") end base.stem_sets = {{reducible = false, vowel_stem = "", nonvowel_stem = ""}} base.decl = "pron" end decls["pron"] = function(base, stems) if base.lemma == "štó" then add_decl(base, stems, "koho", "komu", nil, nil, "kim", "kim") elseif base.lemma == "što" then add_decl(base, stems, "čeho", "čemu", nil, nil, "čim", "čim") else error(("Internal error: Unrecognized pronoun lemma '%s'"):format(base.lemma)) end end declprops["pron"] = { desc = "GENDER pronoun", cat = {}, } local function set_num_defaults(base) if base.gender or base.animacy then error("Can't specify gender, number or animacy for numeral") end local function num_props() -- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC. return "none", "mn", "none", false end local gender, number, animacy, has_clitic = num_props() base.gender = gender base.actual_gender = gender base.number = number base.actual_number = number base.animacy = animacy base.actual_animacy = animacy base.has_clitic = has_clitic end local function set_det_defaults(base) if base.gender or base.number or base.animacy then error("Can't specify gender, number or animacy for determiner") end local function det_props() -- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC. return "none", "none", "none", false end local gender, number, animacy, has_clitic = det_props() base.gender = gender base.actual_gender = gender base.number = number base.actual_number = number base.animacy = animacy base.actual_animacy = animacy base.has_clitic = has_clitic end local function determine_determiner_stems(base) if base.stem_sets then error("Reducible and vowel alternation specs cannot be given with determiners") end local stem = rmatch(base.lemma, "^(.*)" .. com.vowel_c .. "$") or base.lemma base.stem_sets = {{reducible = false, vowel_stem = stem, nonvowel_stem = stem}} base.decl = "det" end decls["det"] = function(base, stems) add_sg_decl(base, stems, "a", "a", "-", nil, "a", "a") end declprops["det"] = { desc = "GENDER determiner", cat = {}, } local function fetch_footnotes(separated_group) local footnotes for j = 2, #separated_group - 1, 2 do if separated_group[j + 1] ~= "" then error("Extraneous text after bracketed footnotes: '" .. table.concat(separated_group) .. "'") end if not footnotes then footnotes = {} end table.insert(footnotes, separated_group[j]) end return footnotes end local function parse_override(segments) local retval = {values = {}} local part = segments[1] local slots = {} while true do local case = usub(part, 1, 3) if cases[case] then -- ok else error(("Unrecognized case '%s' in override: '%s'"):format(case, table.concat(segments))) end part = usub(part, 4) local slot if rfind(part, "^pl") then part = usub(part, 3) slot = case .. "_p" elseif rfind(part, "^du") then part = usub(part, 3) slot = case .. "_d" else slot = case .. "_s" end table.insert(slots, slot) if rfind(part, "^%+") then part = usub(part, 2) else break end end if rfind(part, "^:") then retval.full = true part = usub(part, 2) end segments[1] = part local colon_separated_groups = iut.split_alternating_runs_and_strip_spaces(segments, ":") for i, colon_separated_group in ipairs(colon_separated_groups) do local value = {} local form = colon_separated_group[1] if form == "" then error(("Use - to indicate an empty ending for slot%s '%s': '%s'"):format(#slots > 1 and "s" or "", table.concat(slots), table.concat(segments))) elseif form == "-" then value.form = "" else value.form = form end value.footnotes = fetch_footnotes(colon_separated_group) table.insert(retval.values, value) end return slots, retval end --[=[ Parse an indicator spec (text consisting of angle brackets and zero or more dot-separated indicators within them). Return value is an object of the form { overrides = { SLOT = {OVERRIDE, OVERRIDE, ...}, -- as returned by parse_override() ... }, forms = {}, -- forms for a single spec alternant; see `forms` below footnotes = {"FOOTNOTE", "FOOTNOTE", ...}, -- may be missing stems = { -- may be missing { reducible = TRUE_OR_FALSE, footnotes = {"FOOTNOTE", "FOOTNOTE", ...}, -- may be missing -- The following fields are filled in by determine_stems() vowel_stem = "STEM", nonvowel_stem = "STEM", oblique_slots = "all", oblique_vowel_stem = "STEM" or nil (only needs to be set if oblique_slots is non-nil), oblique_nonvowel_stem = "STEM" or nil (only needs to be set if oblique_slots is non-nil), }, ... }, gender = "GENDER", -- "m", "ž", "n" number = "NUMBER", -- "jed", "mn"; may be missing animacy = "ANIMACY", -- "wěc", "an"; may be missing hard = true, -- may be missing soft = true, -- may be missing mixed = true, -- may be missing surname = true, -- may be missing istem = true, -- may be missing ["-istem"] = true, -- may be missing tstem = true, -- may be missing nstem = true, -- may be missing tech = true, -- may be missing foreign = true, -- may be missing mostlyindecl = true, -- may be missing indecl = true, -- may be missing manual = true, -- may be missing adj = true, -- may be missing decllemma = "DECLENSION-LEMMA", -- may be missing declgender = "DECLENSION-GENDER", -- may be missing declnumber = "DECLENSION-NUMBER", -- may be missing -- The following additional fields are added by other functions: orig_lemma = "ORIGINAL-LEMMA", -- as given by the user orig_lemma_no_links = "ORIGINAL-LEMMA-NO-LINKS", -- links removed lemma = "LEMMA", -- `orig_lemma_no_links`, converted to singular form if plural and lowercase if all-uppercase forms = { SLOT = { { form = "FORM", footnotes = {"FOOTNOTE", "FOOTNOTE", ...} -- may be missing }, ... }, ... }, decl = "DECL", -- declension, e.g. "hard-m" vowel_stem = "VOWEL-STEM", -- derived from vowel-ending lemmas nonvowel_stem = "NONVOWEL-STEM", -- derived from non-vowel-ending lemmas } ]=] local function parse_indicator_spec(angle_bracket_spec) local inside = rmatch(angle_bracket_spec, "^<(.*)>$") assert(inside) local base = {overrides = {}, forms = {}} if inside ~= "" then local segments = iut.parse_balanced_segment_run(inside, "[", "]") local dot_separated_groups = iut.split_alternating_runs_and_strip_spaces(segments, "%.") for i, dot_separated_group in ipairs(dot_separated_groups) do local part = dot_separated_group[1] local case_prefix = usub(part, 1, 3) if cases[case_prefix] then local slots, override = parse_override(dot_separated_group) for _, slot in ipairs(slots) do if base.overrides[slot] then error(("Two overrides specified for slot '%s'"):format(slot)) else base.overrides[slot] = {override} end end elseif part == "" then if #dot_separated_group == 1 then error("Blank indicator: '" .. inside .. "'") end base.footnotes = fetch_footnotes(dot_separated_group) elseif rfind(part, "^[-*#ě]*$") or rfind(part, "^[-*#ě]*,") then if base.stem_sets then error("Can't specify reducible/vowel-alternant indicator twice: '" .. inside .. "'") end local comma_separated_groups = iut.split_alternating_runs_and_strip_spaces(dot_separated_group, ",") local stem_sets = {} for i, comma_separated_group in ipairs(comma_separated_groups) do local pattern = comma_separated_group[1] local orig_pattern = pattern local reducible, vowelalt, oblique_slots if pattern == "-" then -- default reducible, no vowel alt else local before, after before, reducible, after = rmatch(pattern, "^(.-)(%-?%*)(.-)$") if before then pattern = before .. after reducible = reducible == "*" end if pattern ~= "" then if not rfind(pattern, "^##?ě?$") then error("Unrecognized vowel-alternation pattern '" .. pattern .. "', should be one of #, ##, #ě or ##ě: '" .. inside .. "'") end if pattern == "#ě" or pattern == "##ě" then vowelalt = "quant-ě" else vowelalt = "quant" end -- `oblique_slots` will be later changed to "all" if the lemma ends in a consonant. oblique_slots = "all" end end table.insert(stem_sets, { reducible = reducible, vowelalt = vowelalt, oblique_slots = oblique_slots, footnotes = fetch_footnotes(comma_separated_group) }) end base.stem_sets = stem_sets elseif #dot_separated_group > 1 then error("Footnotes only allowed with slot overrides, reducible or vowel alternation specs or by themselves: '" .. table.concat(dot_separated_group) .. "'") elseif part == "m" or part == "ž" or part == "n" then if base.gender then error("Can't specify gender twice: '" .. inside .. "'") end base.gender = part elseif part == "jed" or part == "dw" or part == "mn" then if base.number then error("Can't specify number twice: '" .. inside .. "'") end base.number = part elseif part == "wos" or part == "zwj" or part == "wěc" then if base.animacy then error("Can't specify animacy twice: '" .. inside .. "'") end base.animacy = part elseif part == "hard" or part == "soft" or part == "istem" or part == "tstem" or part == "nstem" or part == "indecl" or part == "pron" or part == "det" or part == "velar" or part == "vstem" or part == "adje" then if base[part] then error("Can't specify '" .. part .. "' twice: '" .. inside .. "'") end base[part] = true elseif part == "+" then if base.adj then error("Can't specify '+' twice: '" .. inside .. "'") end base.adj = true elseif part == "!" then if base.manual then error("Can't specify '!' twice: '" .. inside .. "'") end base.manual = true elseif rfind(part, "^mixedistem:") then if base.mixedistem then error("Can't specify 'mixedistem:' twice: '" .. inside .. "'") end base.mixedistem = rsub(part, "^mixedistem:", "") elseif rfind(part, "^decllemma:") then if base.decllemma then error("Can't specify 'decllemma:' twice: '" .. inside .. "'") end base.decllemma = rsub(part, "^decllemma:", "") elseif rfind(part, "^declgender:") then if base.declgender then error("Can't specify 'declgender:' twice: '" .. inside .. "'") end base.declgender = rsub(part, "^declgender:", "") elseif rfind(part, "^declnumber:") then if base.declnumber then error("Can't specify 'declnumber:' twice: '" .. inside .. "'") end base.declnumber = rsub(part, "^declnumber:", "") else error("Unrecognized indicator '" .. part .. "': '" .. inside .. "'") end end end return base end local function is_regular_noun(base) return not base.adj and not base.pron and not base.det and not base.num end local function process_declnumber(base) base.actual_number = base.number if base.declnumber then if base.declnumber == "jed" or base.declnumber == "dw" or base.declnumber == "mn" then base.number = base.declnumber else error(("Unrecognized value '%s' for 'declnumber', should be 'sg' or 'pl'"):format(base.declnumber)) end end end local function set_defaults_and_check_bad_indicators(base) -- Set default values. local regular_noun = is_regular_noun(base) if base.pron then set_pron_defaults(base) elseif base.det then set_det_defaults(base) elseif base.num then set_num_defaults(base) elseif not base.adj then if not base.gender then if base.manual then base.gender = "none" else error("For nouns, gender must be specified") end end base.number = base.number or "allthree" process_declnumber(base) base.animacy = base.animacy or "wěc" base.actual_gender = base.gender base.actual_animacy = base.animacy if base.declgender then if base.declgender == "m-an" then base.gender = "m" base.animacy = "wos" elseif base.declgender == "m-in" then base.gender = "m" base.animacy = "wěc" elseif base.declgender == "ž" or base.declgender == "n" then base.gender = base.declgender else error(("Unrecognized value '%s' for 'declgender', should be 'm-an', 'm-in', 'f' or 'n'"):format(base.declgender)) end end end -- Check for bad indicator combinations. if (base.hard and 1 or 0) + (base.soft and 1 or 0) > 1 then error("At most one of 'hard' or 'soft' can be specified") end if base.istem and base["-istem"] then error("'istem' and '-istem' cannot be specified together") end if (base.istem or base["-istem"]) then if not regular_noun then error("'istem' and '-istem' can only be specified with regular nouns") end end if base.declgender and not regular_noun then error("'declgender' can only be specified with regular nouns") end end local function set_all_defaults_and_check_bad_indicators(alternant_multiword_spec) local is_multiword = #alternant_multiword_spec.alternant_or_word_specs > 1 iut.map_word_specs(alternant_multiword_spec, function(base) set_defaults_and_check_bad_indicators(base) base.multiword = is_multiword -- FIXME: not currently used; consider deleting alternant_multiword_spec.has_clitic = alternant_multiword_spec.has_clitic or base.has_clitic if base.pron then alternant_multiword_spec.saw_pron = true else alternant_multiword_spec.saw_non_pron = true end if base.det then alternant_multiword_spec.saw_det = true else alternant_multiword_spec.saw_non_det = true end if base.num then alternant_multiword_spec.saw_num = true else alternant_multiword_spec.saw_non_num = true end end) end local function undo_second_palatalization(base, word, is_adjective) local function try(from, to) local stem = rmatch(word, "^(.*)" .. from .. "$") if stem then return stem .. to end return nil end return is_adjective and try("št", "sk") or is_adjective and try("čt", "ck") or try("c", "k") or -- FIXME, this could be wrong and c correct try("ř", "r") or try("z", "h") or -- FIXME, this could be wrong and z or g correct try("š", "ch") or word end -- For a plural-only lemma, synthesize a likely singular lemma. It doesn't have to be -- theoretically correct as long as it generates all the correct plural forms. local function synthesize_singular_lemma(base) if not base.stem_sets then base.stem_sets = {{}} end local lemma_determined -- Loop over all stem sets in case the user specified multiple ones (e.g. '*,-*'). If we try to reconstruct -- different lemmas for different stem sets, we'll throw an error below. for _, stems in ipairs(base.stem_sets) do local stem, lemma while true do if base.indecl then -- If specified as indeclinable, leave it alone; e.g. 'pesos' indeclinable plural of [[peso]]. lemma = base.lemma break elseif base.gender == "m" then stem = rmatch(base.lemma, "^(.*)i$") if stem then if base.soft then -- [[Blíženci]] "Gemini" -- Since the nominative singular has no ending. lemma = com.convert_paired_plain_to_palatal(stem, ending) else lemma = undo_second_palatalization(base, stem) end else stem = rmatch(base.lemma, "^(.*)ové$") or rmatch(base.lemma, "^(.*)[éyě]$") or rmatch(base.lemma, "^(.*)ie$") if stem then -- [[manželé]] "married couple", [[Velšané]] "Welsh people" lemma = stem else error(("Masculine plural-only lemma '%s' should end in -i, -ové or -é"):format(base.lemma)) end end if stems.reducible == nil then if rfind(lemma, com.cons_c .. "[ck]$") and not com.is_monosyllabic(base.lemma) then stems.reducible = true end if stems.reducible then lemma = dereduce(base, lemma) end end break elseif base.gender == "ž" then stem = rmatch(base.lemma, "^(.*)y$") if stem then lemma = stem .. "a" break end stem = rmatch(base.lemma, "^(.*)[eě]$") if stem then -- Singular like the plural. Cons-stem feminines like [[dlaň]] "palm (of the hand)" have identical -- plurals to soft-stem feminines like [[růže]] (modulo e/ě differences), so we don't need to -- reconstruct the former type. lemma = base.lemma break end stem = rmatch(base.lemma, "^(.*)i$") if stem then -- i-stems. lemma = stem base.istem = true break end error(("Feminine plural-only lemma '%s' should end in -y, -ě, -e or -i"):format(base.lemma)) elseif base.gender == "n" then -- -ata nouns like [[slůně]] "baby elephant" nom pl 'slůňata' are declined in the plural same as if -- the singular were 'slůňato' so we don't have to worry about them. stem = rmatch(base.lemma, "^(.*)a$") if stem then lemma = stem .. "o" break end stem = rmatch(base.lemma, "^(.*)[eěí]$") if stem then -- singular lemma also in -e, -ě or -í; e.g. [[věčná loviště]] "[[happy hunting ground]]" lemma = base.lemma break end error(("Neuter plural-only lemma '%s' should end in -a, -í, -ě or -e"):format(base.lemma)) else error(("Internal error: Unrecognized gender '%s'"):format(base.gender)) end end if lemma_determined and lemma_determined ~= lemma then error(("Attempt to set two different singular lemmas '%s' and '%s'"):format(lemma_determined, lemma)) end lemma_determined = lemma end base.lemma = lemma_determined end -- For an adjectival lemma, synthesize the masc singular form. local function synthesize_adj_lemma(base) local stem if base.indecl then base.decl = "indecl" stem = base.lemma else local gender, number local function sub_ov(stem) stem = stem:gsub("ov$", "ův") return stem end while true do if base.number == "mn" then if base.gender == "m" then stem = rmatch(base.lemma, "^(.*)í$") if stem then if base.soft then -- nothing to do else if base.animacy ~= "wos" then error(("Masculine plural-only adjectival lemma '%s' ending in -í can only be animate unless '.soft' is specified"): format(base.lemma)) end base.lemma = undo_second_palatalization(base, stem, "is adjective") .. "ý" end break end stem = rmatch(base.lemma, "^(.*)é$") if stem then if base.animacy == "wos" then error(("Masculine plural-only adjectival lemma '%s' ending in -é must be inanimate"): format(base.lemma)) end base.lemma = stem .. "ý" break end stem = rmatch(base.lemma, "^(.*ov)i$") or rmatch(base.lemma, "^(.*in)i$") if stem then if base.animacy ~= "wos" then error(("Masculine plural-only possessive adjectival lemma '%s' ending in -i must be animate"): format(base.lemma)) end base.lemma = sub_ov(stem) break end stem = rmatch(base.lemma, "^(.*ov)y$") or rmatch(base.lemma, "^(.*in)y$") if stem then if base.animacy == "wos" then error(("Masculine plural-only possessive adjectival lemma '%s' ending in -y must be inanimate"): format(base.lemma)) end base.lemma = sub_ov(stem) break end if base.animacy == "wos" then error(("Animate masculine plural-only adjectival lemma '%s' should end in -í, -ovi or -ini"): format(base.lemma)) elseif base.soft then error(("Soft masculine plural-only adjectival lemma '%s' should end in -í"):format(base.lemma)) else error(("Inanimate masculine plural-only adjectival lemma '%s' should end in -é, -ovy or -iny"): format(base.lemma)) end elseif base.gender == "ž" then stem = rmatch(base.lemma, "^(.*)é$") -- hard adjective if stem then base.lemma = stem .. "ý" break end stem = rmatch(base.lemma, "^(.*)í$") -- soft adjective if stem then break end stem = rmatch(base.lemma, "^(.*ov)y$") or rmatch(base.lemma, "^(.*in)y$") -- possessive adjective if stem then base.lemma = sub_ov(stem) break end error(("Feminine plural-only adjectival lemma '%s' should end in -é, -í, -ovy or -iny"):format(base.lemma)) else stem = rmatch(base.lemma, "^(.*)á$") -- hard adjective if stem then base.lemma = stem .. "ý" break end stem = rmatch(base.lemma, "^(.*)í$") -- soft adjective if stem then break end stem = rmatch(base.lemma, "^(.*ov)a$") or rmatch(base.lemma, "^(.*in)a$") -- possessive adjective if stem then base.lemma = sub_ov(stem) break end error(("Neuter plural-only adjectival lemma '%s' should end in -á, -í, -ova or -ina"):format(base.lemma)) end else if base.gender == "m" then stem = rmatch(base.lemma, "^(.*)[ýí]$") or rmatch(base.lemma, "^(.*)ův$") or rmatch(base.lemma, "^(.*)in$") if stem then break end error(("Masculine adjectival lemma '%s' should end in -ý, -í, -ův or -in"):format(base.lemma)) elseif base.gender == "ž" then stem = rmatch(base.lemma, "^(.*)á$") if stem then base.lemma = stem .. "ý" break end stem = rmatch(base.lemma, "^(.*)í$") if stem then break end stem = rmatch(base.lemma, "^(.*ov)a$") or rmatch(base.lemma, "^(.*in)a$") if stem then base.lemma = sub_ov(stem) break end error(("Feminine adjectival lemma '%s' should end in -á, -í, -ova or -ina"):format(base.lemma)) else stem = rmatch(base.lemma, "^(.*)í$") if stem then break end stem = rmatch(base.lemma, "^(.*ov)o$") or rmatch(base.lemma, "^(.*in)o$") if stem then base.lemma = sub_ov(stem) break end error(("Neuter adjectival lemma '%s' should end in -é, -í, -ovo or -ino"):format(base.lemma)) end end end base.decl = "adj" end -- Now set the stem sets if not given. -- Now set the stem sets if not given. if not base.stem_sets then base.stem_sets = {{reducible = false}} end for _, stems in ipairs(base.stem_sets) do -- Set the stems. stems.vowel_stem = stem stems.nonvowel_stem = stem end end -- Determine the declension based on the lemma, gender and number. The declension is set in base.decl. In the process, -- we set either base.vowel_stem (if the lemma ends in a vowel) or base.nonvowel_stem (if the lemma does not end in a -- vowel), which is used by determine_stems(). In some cases (specifically with certain foreign nouns), we set -- base.lemma to a new value; this is as if the user specified 'decllemma:'. local function determine_declension(base) if base.indecl then base.decl = "indecl" base.nonvowel_stem = base.lemma return end -- Determine declension stem = rmatch(base.lemma, "^(.*)a$") if stem then if base.gender == "m" then if base.animacy ~= "wos" then error("Masculine lemma in -a must be animate") end base.decl = "a-m" elseif base.gender == "ž" then if base.hard then base.decl = "hard-f" elseif base.soft then base.decl = "soft-f" elseif base.adje then base.decl = "adje-f" elseif rfind(base.lemma, com.velar_c .. "a$") then base.decl = "velar-f" elseif rfind(base.lemma, "[czs]" .. "a$") then base.decl = "czs-f" elseif rfind(base.lemma, com.inherently_soft_c .. "a$") then base.decl = "soft-f" else base.decl = "hard-f" end elseif base.gender == "n" then if rfind(stem, "m$") then base.decl = "ma-n" else error("Lemma ending in -a and neuter must end in -ma") end end base.vowel_stem = stem return end local ending stem, ending = rmatch(base.lemma, "^(.*)e$") if stem then if base.tstem then base.decl = "tstem-n" elseif base.adje then base.decl = "adje-n" else base.decl = "soft-n" end base.vowel_stem = stem return end stem = rmatch(base.lemma, "^(.*)o$") if stem then if base.gender == "m" then -- Cf. [[maestro]] m. base.decl = "o-m" elseif base.gender == "ž" then -- [[zoo]]; [[Žemaitsko]]? error("Feminine nouns in -o are indeclinable; use '.indecl' if needed") elseif base.hard then base.decl = "hard-n" elseif base.tstem then base.decl = "tstem-n" elseif base.nstem then base.decl = "nstem-n" elseif rfind(base.lemma, "[czs]" .. "o$") then base.decl = "czs-n" elseif rfind(base.lemma, com.inherently_soft_c .. "o$") then base.decl = "soft-n" elseif rfind(base.lemma, com.velar_c .. "o$") then base.decl = "velar-n" else base.decl = "hard-n" end base.vowel_stem = stem return end stem = rmatch(base.lemma, "^(.*)[iy]$") if stem then if base.gender == "m" then if base.adje then base.decl = "adje-m" end end base.vowel_stem = stem return end stem = rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$") if stem then if base.gender == "m" then if base.hard then base.decl = "hard-m" elseif base.soft then base.decl = "soft-m" elseif rfind(base.lemma, com.velar_c .. "$") then base.decl = "velar-m" elseif rfind(base.lemma, "[czs]" .. "$") then base.decl = "czs-m" elseif rfind(base.lemma, com.inherently_soft_c .. "$") then base.decl = "soft-m" else base.decl = "hard-m" end elseif base.gender == "ž" then if base.vstem then base.decl = "v-f" stem = rmatch(base.lemma, "^(.*)ej$") elseif base.soft then base.decl = "soft-f" elseif rfind(base.lemma, "[czs]" .. "$") then base.decl = "czs-f" else base.decl = "soft-f" end elseif base.gender == "n" then if base.foreign then stem = rmatch(base.lemma, "^(.*)um$") or rmatch(base.lemma, "^(.*)on$") if not stem then error("Unrecognized neuter foreign ending, should be -um or -on") end if base.hard then base.decl = "hard-n" elseif rfind(stem, "[eiuy]$") then base.decl = "semisoft-n" else base.decl = "hard-n" end -- set the lemma here as if decllemma: were given base.lemma = stem .. "o" base.vowel_stem = stem return else error("Neuter nouns ending in a consonant should use '.foreign' or '.decllemma:...'") end end base.nonvowel_stem = stem return end error("Unrecognized ending for lemma: '" .. base.lemma .. "'") end -- Determine the default value for the 'reducible' flag. local function determine_default_reducible(base) -- Nouns in vowels other than -a/o as well as masculine nouns ending in all vowels don't have null endings so not -- reducible. Note, we are never called on adjectival nouns. if rfind(base.lemma, "[iyuíeě]$") or base.gender == "m" and rfind(base.lemma, "[ao]$") or base.tstem then base.default_reducible = false return end local stem stem = rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$") if stem then if base.gender == "m" and rfind(stem, "e[ck]$") and not com.is_monosyllabic(stem) then base.default_reducible = true elseif base.gender == "ž" and rfind(stem, "eń$") then -- pěseń base.default_reducible = true else base.default_reducible = false end return end base.default_reducible = false end -- Determine the stems to use for each stem set: vowel and nonvowel stems, for singular -- and plural. We assume that one of base.vowel_stem or base.nonvowel_stem has been -- set in determine_declension(), depending on whether the lemma ends in -- a vowel. We construct all the rest given the reducibility, vowel alternation spec and -- any explicit stems given. We store the determined stems inside of the stem-set objects -- in `base.stem_sets`, meaning that if the user gave multiple reducible or vowel-alternation -- patterns, we will compute multiple sets of stems. The reason is that the stems may vary -- depending on the reducibility and vowel alternation. local function determine_stems(base) if not base.stem_sets then base.stem_sets = {{}} end -- Set default reducible and check for default mixed reducible, which needs to be expanded into two entries. local default_mixed_reducible = false for _, stems in ipairs(base.stem_sets) do if stems.reducible == nil then stems.reducible = base.default_reducible end end if default_mixed_reducible then local new_stem_sets = {} for _, stems in ipairs(base.stem_sets) do table.insert(new_stem_sets, stems) end base.stem_sets = new_stem_sets end -- Now determine all the stems for each stem set. for _, stems in ipairs(base.stem_sets) do local lemma_is_vowel_stem = not not base.vowel_stem if base.vowel_stem then stems.vowel_stem = base.vowel_stem stems.nonvowel_stem = stems.vowel_stem -- Apply vowel alternation first in cases like jádro -> jader; apply_vowel_alternation() will throw an error -- if the vowel being modified isn't the last vowel in the stem. stems.oblique_nonvowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.nonvowel_stem) if stems.reducible then stems.nonvowel_stem = dereduce(base, stems.nonvowel_stem) stems.oblique_nonvowel_stem = dereduce(base, stems.oblique_nonvowel_stem) end else stems.nonvowel_stem = base.nonvowel_stem -- The user specified #. E.g. nóc nocy if stems.oblique_slots then stems.oblique_slots = "all" end stems.oblique_nonvowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.nonvowel_stem) if stems.reducible then stems.vowel_stem = com.reduce(base.nonvowel_stem) if not stems.vowel_stem then error("Unable to reduce stem '" .. base.nonvowel_stem .. "'") end else stems.vowel_stem = base.nonvowel_stem end end stems.oblique_vowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.vowel_stem) end end local function detect_indicator_spec(base) if base.pron then determine_pronoun_stems(base) elseif base.det then determine_determiner_stems(base) elseif base.num then determine_numeral_stems(base) elseif base.adj then process_declnumber(base) synthesize_adj_lemma(base) elseif base.manual then if base.stem_sets then -- FIXME, maybe this should be allowed? error("Reducible and vowel alternation specs cannot be given with manual declensions") end base.stem_sets = {{reducible = false, vowel_stem = "", nonvowel_stem = ""}} base.decl = "manual" else if base.number == "mn" then synthesize_singular_lemma(base) end determine_declension(base) determine_default_reducible(base) determine_stems(base) end end local function detect_all_indicator_specs(alternant_multiword_spec) alternant_multiword_spec.sg_genders = {} alternant_multiword_spec.pl_genders = {} iut.map_word_specs(alternant_multiword_spec, function(base) detect_indicator_spec(base) if base.number ~= "mn" then alternant_multiword_spec.sg_genders[base.actual_gender] = true end if base.number ~= "jed" then -- All t-stem masculines are neuter in the plural. local plgender plgender = base.actual_gender alternant_multiword_spec.pl_genders[plgender] = true end end) if (alternant_multiword_spec.saw_pron and 1 or 0) + (alternant_multiword_spec.saw_det and 1 or 0) + (alternant_multiword_spec.saw_num and 1 or 0) > 1 then error("Can't combine pronouns, determiners and/or numerals") end end local propagate_multiword_properties local function propagate_alternant_properties(alternant_spec, property, mixed_value, nouns_only) local seen_property for _, multiword_spec in ipairs(alternant_spec.alternants) do propagate_multiword_properties(multiword_spec, property, mixed_value, nouns_only) if seen_property == nil then seen_property = multiword_spec[property] elseif multiword_spec[property] and seen_property ~= multiword_spec[property] then seen_property = mixed_value end end alternant_spec[property] = seen_property end propagate_multiword_properties = function(multiword_spec, property, mixed_value, nouns_only) local seen_property = nil local last_seen_nounal_pos = 0 local word_specs = multiword_spec.alternant_or_word_specs or multiword_spec.word_specs for i = 1, #word_specs do local is_nounal if word_specs[i].alternants then propagate_alternant_properties(word_specs[i], property, mixed_value) is_nounal = not not word_specs[i][property] elseif nouns_only then is_nounal = is_regular_noun(word_specs[i]) else is_nounal = not not word_specs[i][property] end if is_nounal then if not word_specs[i][property] then error("Internal error: noun-type word spec without " .. property .. " set") end for j = last_seen_nounal_pos + 1, i - 1 do word_specs[j][property] = word_specs[j][property] or word_specs[i][property] end last_seen_nounal_pos = i if seen_property == nil then seen_property = word_specs[i][property] elseif seen_property ~= word_specs[i][property] then seen_property = mixed_value end end end if last_seen_nounal_pos > 0 then for i = last_seen_nounal_pos + 1, #word_specs do word_specs[i][property] = word_specs[i][property] or word_specs[last_seen_nounal_pos][property] end end multiword_spec[property] = seen_property end local function propagate_properties_downward(alternant_multiword_spec, property, default_propval) local function set_and_fetch(obj, default) local retval if obj[property] then retval = obj[property] else obj[property] = default retval = default end if not obj["actual_" .. property] then obj["actual_" .. property] = retval end return retval end local propval1 = set_and_fetch(alternant_multiword_spec, default_propval) for _, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do local propval2 = set_and_fetch(alternant_or_word_spec, propval1) if alternant_or_word_spec.alternants then for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do local propval3 = set_and_fetch(multiword_spec, propval2) for _, word_spec in ipairs(multiword_spec.word_specs) do local propval4 = set_and_fetch(word_spec, propval3) if propval4 == "mixed" then -- FIXME, use clearer error message. error("Attempt to assign mixed " .. property .. " to word") end set_and_fetch(word_spec, propval4) end end else if propval2 == "mixed" then -- FIXME, use clearer error message. error("Attempt to assign mixed " .. property .. " to word") end set_and_fetch(alternant_or_word_spec, propval2) end end end --[=[ Propagate `property` (one of "animacy", "gender" or "number") from nouns to adjacent adjectives. We proceed as follows: 1. We assume the properties in question are already set on all nouns. This should happen in set_defaults_and_check_bad_indicators(). 2. We first propagate properties upwards and sideways. We recurse downwards from the top. When we encounter a multiword spec, we proceed left to right looking for a noun. When we find a noun, we fetch its property (recursing if the noun is an alternant), and propagate it to any adjectives to its left, up to the next noun to the left. When we have processed the last noun, we also propagate its property value to any adjectives to the right (to handle e.g. [[anděl strážný]] "guardian angel", where the adjective [[strážný]] should inherit the 'masculine' and 'animate' properties of [[anděl]]). Finally, we set the property value for the multiword spec itself by combining all the non-nil properties of the individual elements. If all non-nil properties have the same value, the result is that value, otherwise it is `mixed_value` (which is "mixed" for animacy and gender, but "allthree" for number). 3. When we encounter an alternant spec in this process, we recursively process each alternant (which is a multiword spec) using the previous step, and combine any non-nil properties we encounter the same way as for multiword specs. 4. The effect of steps 2 and 3 is to set the property of each alternant and multiword spec based on its children or its neighbors. ]=] local function propagate_properties(alternant_multiword_spec, property, default_propval, mixed_value) propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, "nouns only") propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, false) propagate_properties_downward(alternant_multiword_spec, property, default_propval) end local function determine_noun_status(alternant_multiword_spec) for i, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do if alternant_or_word_spec.alternants then local is_noun = false for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do for j, word_spec in ipairs(multiword_spec.word_specs) do if is_regular_noun(word_spec) then multiword_spec.first_noun = j is_noun = true break end end end if is_noun then alternant_multiword_spec.first_noun = i end elseif is_regular_noun(alternant_or_word_spec) then alternant_multiword_spec.first_noun = i return end end end -- Set the part of speech based on properties of the individual words. local function set_pos(alternant_multiword_spec) if alternant_multiword_spec.args.pos then alternant_multiword_spec.pos = alternant_multiword_spec.args.pos elseif alternant_multiword_spec.saw_pron and not alternant_multiword_spec.saw_non_pron then alternant_multiword_spec.pos = "pronoun" elseif alternant_multiword_spec.saw_det and not alternant_multiword_spec.saw_non_det then alternant_multiword_spec.pos = "determiner" elseif alternant_multiword_spec.saw_num and not alternant_multiword_spec.saw_non_num then alternant_multiword_spec.pos = "numeral" else alternant_multiword_spec.pos = "noun" end alternant_multiword_spec.plpos = require("Modul:string utilities").pluralize(alternant_multiword_spec.pos) end local function normalize_all_lemmas(alternant_multiword_spec, pagename) iut.map_word_specs(alternant_multiword_spec, function(base) if base.lemma == "" then base.lemma = pagename end base.orig_lemma = base.lemma base.orig_lemma_no_links = m_links.remove_links(base.lemma) local lemma = base.orig_lemma_no_links -- If the lemma is all-uppercase, lowercase it but note this, so that later in combine_stem_ending() we convert it -- back to uppercase. This allows us to handle all-uppercase acronyms without a lot of extra complexity. -- FIXME: This may not make sense at all. if uupper(lemma) == lemma then base.all_uppercase = true lemma = ulower(lemma) end base.actual_lemma = lemma base.lemma = base.decllemma or lemma end) end local function decline_noun(base) for _, stems in ipairs(base.stem_sets) do if not decls[base.decl] then error("Internal error: Unrecognized declension type '" .. base.decl .. "'") end decls[base.decl](base, stems) end handle_derived_slots_and_overrides(base) local function copy(from_slot, to_slot) base.forms[to_slot] = base.forms[from_slot] end if base.gender ~= "m" then copy("nom_d", "acc_d") end copy("nom_d", "voc_d") copy("dat_d", "loc_d") copy("dat_d", "ins_d") if base.actual_number ~= base.number then local source_num = base.number == "jed" and "_s" or base.number == "dw" and "_d" or "_p" local dest_num = base.number == "jed" and {"_p", "_d"} or base.number == "dw" and {"_s", "_p"} or {"_s", "_d"} for case, _ in pairs(cases) do copy(case .. source_num, case .. dest_num) copy("nom" .. source_num .. "_linked", "nom" .. dest_num .. "_linked") end if base.actual_number ~= "allthree" then local erase_num = base.actual_number == "jed" and {"_d", "_p"} or base.actual_number == "dw" and {"_s", "_p"} or {"_s", "_d"} for case, _ in pairs(cases) do base.forms[case .. erase_num] = nil end base.forms["nom" .. erase_num .. "_linked"] = nil end end end local function get_variants(form) return nil --[=[ FIXME return form:find(com.VAR1) and "var1" or form:find(com.VAR2) and "var2" or form:find(com.VAR3) and "var3" or nil ]=] end -- Compute the categories to add the noun to, as well as the annotation to display in the -- declension title bar. We combine the code to do these functions as both categories and -- title bar contain similar information. local function compute_categories_and_annotation(alternant_multiword_spec) local all_cats = {} local function insert(cattype) m_table.insertIfNot(all_cats, "Upper Sorbian " .. cattype) end if alternant_multiword_spec.pos == "noun" then if alternant_multiword_spec.actual_number == "jed" then insert("uncountable nouns") elseif alternant_multiword_spec.actual_number == "dw" then insert("dualia tantum") elseif alternant_multiword_spec.actual_number == "mn" then insert("pluralia tantum") end end local annotation local annparts = {} local decldescs = {} local vowelalts = {} local foreign = {} local irregs = {} local stemspecs = {} local reducible = nil local function get_genanim(gender, animacy) local gender_code_to_desc = { m = "masculine", f = "feminine", n = "neuter", none = nil, } local animacy_code_to_desc = { pr = "personal", anml = "animal", inan = "inanimate", none = nil, } local descs = {} table.insert(descs, gender_code_to_desc[gender]) if gender ~= "ž" and gender ~= "n" then -- masculine or "none" (e.g. certain pronouns and numerals) table.insert(descs, animacy_code_to_desc[animacy]) end return table.concat(descs, " ") end local function trim(text) text = text:gsub(" +", " ") return mw.text.trim(text) end local function do_word_spec(base) local actual_genanim = get_genanim(base.actual_gender, base.actual_animacy) local declined_genanim = get_genanim(base.gender, base.animacy) local genanim genanim = actual_genanim if base.actual_gender == "m" then insert(actual_genanim .. " " .. alternant_multiword_spec.plpos) end for _, stems in ipairs(base.stem_sets) do local props = declprops[base.decl] local cats = props.cat if type(cats) == "function" then cats = cats(base, stems) end if type(cats) == "string" then cats = {cats} end local default_desc for i, cat in ipairs(cats) do if not cat:find("GENDER") and not cat:find("GENPOS") and not cat:find("POS") then cat = cat end cat = cat:gsub("GENPOS", "GENDER POS") if not cat:find("POS") then cat = cat .. " POS" end if i == #cats then default_desc = cat:gsub(" POS", "") end cat = cat:gsub("GENDER", actual_genanim) cat = cat:gsub("POS", alternant_multiword_spec.plpos) -- Need to trim `cat` because actual_genanim may be an empty string. insert(trim(cat)) end local desc = props.desc if type(desc) == "function" then desc = desc(base, stems) end desc = desc or default_desc desc = desc:gsub("GENDER", genanim) -- Need to trim `desc` because genanim may be an empty string. m_table.insertIfNot(decldescs, trim(desc)) local vowelalt if stems.vowelalt == "quant" then vowelalt = "quant-alt" insert("nouns with quantitative vowel alternation") elseif stems.vowelalt == "quant-ě" then vowelalt = "í-ě-alt" insert("nouns with í-ě alternation") end if vowelalt then m_table.insertIfNot(vowelalts, vowelalt) end if reducible == nil then reducible = stems.reducible end if stems.reducible then insert("nouns with reducible stem") end if base.foreign then m_table.insertIfNot(foreign, "foreign") if not base.decllemma then -- NOTE: there are nouns that use both 'foreign' and 'decllemma', e.g. [[Zeus]]. insert("nouns with regular foreign declension") end end -- User-specified 'decllemma:' indicates irregular stem. Don't consider foreign nouns in -us/-os/-es, -um/-on or -- silent -e (e.g. [[software]]) where this ending is simply dropped in oblique and plural forms as irregular; -- there are too many of these and they are already categorized above as 'nouns with regular foreign declension'. if base.decllemma then m_table.insertIfNot(irregs, "irreg-stem") insert("nouns with irregular stem") end m_table.insertIfNot(stemspecs, stems.vowel_stem) end end local key_entry = alternant_multiword_spec.first_noun or 1 if #alternant_multiword_spec.alternant_or_word_specs >= key_entry then local alternant_or_word_spec = alternant_multiword_spec.alternant_or_word_specs[key_entry] if alternant_or_word_spec.alternants then for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do key_entry = multiword_spec.first_noun or 1 if #multiword_spec.word_specs >= key_entry then do_word_spec(multiword_spec.word_specs[key_entry]) end end else do_word_spec(alternant_or_word_spec) end end if alternant_multiword_spec.actual_number == "jed" or alternant_multiword_spec.actual_number == "mn" or alternant_multiword_spec.actual_number == "dw" then -- not "allthree" or "none" (for [[sebe]]) table.insert(annparts, alternant_multiword_spec.actual_number == "jed" and '<abbr title="jenož jednota">jed</abbr>' or alternant_multiword_spec.actual_number == "dw" and '<abbr title="jenož dwojota">dw</abbr>' or '<abbr title="jenož mnohota">mn</abbr>') end if #decldescs == 0 then table.insert(annparts, "indecl") else table.insert(annparts, table.concat(decldescs, " // ")) end if #vowelalts > 0 then table.insert(annparts, table.concat(vowelalts, "/")) end if reducible == "mixed" then table.insert(annparts, "mixed-reducible") elseif reducible then table.insert(annparts, "reducible") end if #foreign > 0 then table.insert(annparts, table.concat(foreign, " // ")) end if #irregs > 0 then table.insert(annparts, table.concat(irregs, " // ")) end alternant_multiword_spec.annotation = table.concat(annparts, " ") if #stemspecs > 1 then insert("nouns with multiple stems") end if alternant_multiword_spec.actual_number == "allthree" and not m_table.deepEquals(alternant_multiword_spec.sg_genders, alternant_multiword_spec.pl_genders) then insert("nouns that change gender in the plural") end alternant_multiword_spec.categories = all_cats end local function show_forms(alternant_multiword_spec) local lemmas = {} for _, slot in ipairs(potential_lemma_slots) do if alternant_multiword_spec.forms[slot] then for _, formobj in ipairs(alternant_multiword_spec.forms[slot]) do -- FIXME, now can support footnotes as qualifiers in headwords? table.insert(lemmas, formobj.form) end break end end local props = { lemmas = lemmas, slot_table = alternant_multiword_spec.output_noun_slots, lang = lang, canonicalize = function(form) -- return com.remove_variant_codes(form) return form end, } iut.show_forms(alternant_multiword_spec.forms, props) end local function make_table(alternant_multiword_spec) local forms = alternant_multiword_spec.forms local function template_prelude(min_width) return rsub([=[ <div> <div class="NavFrame" style="max-width: MINWIDTHem"> <div class="NavHead" style="background:#eff7ff">{title}{annotation}</div> <div class="NavContent"> {\op}| style="background:#F9F9F9;text-align:center;width:100%;display:table" class="inflection-table" |- ]=], "MINWIDTH", min_width) end local function template_postlude() return [=[ |{\cl}{notes_clause}</div></div></div>]=] end local table_spec_allthree = template_prelude("45") .. [=[ ! style="width:33%;background:#d9ebff" | ! style="background:#d9ebff" | jednota ! style="background:#d9ebff" | dwojota ! style="background:#d9ebff" | mnohota |- !style="background:#eff7ff"|mjenowak | {nom_s} | {nom_d} | {nom_p} |- !style="background:#eff7ff"|rodźak | {gen_s} | {gen_d} | {gen_p} |- !style="background:#eff7ff"|dawak | {dat_s} | {dat_d} | {dat_p} |- !style="background:#eff7ff"|žadak | {acc_s} | {acc_d} | {acc_p} |- !style="background:#eff7ff"|posrědnik | {ins_s} | {ins_d} | {ins_p} |- !style="background:#eff7ff"|měsćak | {loc_s} | {loc_d} | {loc_p} |- !style="background:#eff7ff"|wołak | {voc_s} | {voc_d} | {voc_p} ]=] .. template_postlude() local function get_table_spec_one_number(number, numcode) local table_spec_one_number = [=[ ! style="width:33%;background:#d9ebff" | ! style="background:#d9ebff" | NUMBER |- !style="background:#eff7ff"|mjenowak | {nom_CODE} |- !style="background:#eff7ff"|rodźak | {gen_CODE} |- !style="background:#eff7ff"|dawak | {dat_CODE} |- !style="background:#eff7ff"|žadak | {acc_CODE} |- !style="background:#eff7ff"|posrědnik | {ins_CODE} |- !style="background:#eff7ff"|měsćak | {loc_CODE} |- !style="background:#eff7ff"|wołak | {voc_CODE} ]=] return template_prelude("30") .. table_spec_one_number:gsub("NUMBER", number):gsub("CODE", numcode) .. template_postlude() end local function get_table_spec_one_number_clitic(number, numcode) local table_spec_one_number_clitic = [=[ ! rowspan=2 style="width:33%;background:#d9ebff"| ! colspan=2 style="background:#d9ebff" | NUMBER |- ! style="width:33%;background:#d9ebff" | stressed ! style="background:#d9ebff" | clitic |- !style="background:#eff7ff"|mjenowak | colspan=2 | {nom_CODE} |- !style="background:#eff7ff"|rodźak | {gen_CODE} | {clitic_gen_CODE} |- !style="background:#eff7ff"|dawak | {dat_CODE} | {clitic_dat_CODE} |- !style="background:#eff7ff"|žadak | {acc_CODE} | {clitic_acc_CODE} |- !style="background:#eff7ff"|posrědnik | colspan=2 | {ins_CODE} |- !style="background:#eff7ff"|měsćak | colspan=2 | {loc_CODE} |- !style="background:#eff7ff"|wołak | colspan=2 | {voc_CODE} ]=] return template_prelude("40") .. table_spec_one_number_clitic:gsub("NUMBER", number):gsub("CODE", numcode) .. template_postlude() end local notes_template = [=[ <div style="width:100%;text-align:left;background:#d9ebff"> <div style="display:inline-block;text-align:left;padding-left:1em;padding-right:1em"> {footnote} </div></div> ]=] if alternant_multiword_spec.title then forms.title = alternant_multiword_spec.title else forms.title = 'Skłonjowanje <i lang="hsb">' .. forms.lemma .. '</i>' end local annotation = alternant_multiword_spec.annotation if annotation == "" then forms.annotation = "" else forms.annotation = " (<span style=\"font-size: smaller;\">" .. annotation .. "</span>)" end local number, numcode if alternant_multiword_spec.actual_number == "jed" then number, numcode = "jednota", "s" elseif alternant_multiword_spec.actual_number == "dw" then number, numcode = "dwojota", "d" elseif alternant_multiword_spec.actual_number == "mn" then number, numcode = "mnohota", "p" elseif alternant_multiword_spec.actual_number == "none" then -- used for [[sebe]] number, numcode = "", "s" end local table_spec = alternant_multiword_spec.actual_number == "allthree" and table_spec_allthree or alternant_multiword_spec.has_clitic and get_table_spec_one_number_clitic(number, numcode) or get_table_spec_one_number(number, numcode) forms.notes_clause = forms.footnote ~= "" and m_string_utilities.format(notes_template, forms) or "" return m_string_utilities.format(table_spec, forms) end local function compute_headword_genders(alternant_multiword_spec) local genders = {} local number if alternant_multiword_spec.actual_number == "mn" then number = "-p" elseif alternant_multiword_spec.actual_number == "dw" then number = "-d" else number = "" end iut.map_word_specs(alternant_multiword_spec, function(base) local animacy = base.animacy if animacy == "wěc" then animacy = "in" end m_table.insertIfNot(genders, base.gender .. "-" .. animacy .. number) end) return genders end -- Externally callable function to parse and decline a noun given user-specified arguments. -- Return value is ALTERNANT_MULTIWORD_SPEC, an object where the declined forms are in -- `ALTERNANT_MULTIWORD_SPEC.forms` for each slot. If there are no values for a slot, the -- slot key will be missing. The value for a given slot is a list of objects -- {form=FORM, footnotes=FOOTNOTES}. function export.do_generate_forms(parent_args, from_headword) local params = { [1] = {required = true, default = "žona<f>"}, title = {}, pagename = {}, json = {type = "boolean"}, pos = {}, } if from_headword then params["head"] = {list = true} params["lemma"] = {list = true} params["g"] = {list = true} params["ž"] = {list = true} params["m"] = {list = true} params["adj"] = {list = true} params["dim"] = {list = true} params["id"] = {} end local args = m_para.process(parent_args, params) local parse_props = { parse_indicator_spec = parse_indicator_spec, angle_brackets_omittable = true, allow_blank_lemma = true, } local alternant_multiword_spec = iut.parse_inflected_text(args[1], parse_props) alternant_multiword_spec.title = args.title alternant_multiword_spec.args = args local pagename = args.pagename or from_headword and args.head[1] or mw.title.getCurrentTitle().subpageText normalize_all_lemmas(alternant_multiword_spec, pagename) set_all_defaults_and_check_bad_indicators(alternant_multiword_spec) -- These need to happen before detect_all_indicator_specs() so that adjectives get their genders and numbers set -- appropriately, which are needed to correctly synthesize the adjective lemma. propagate_properties(alternant_multiword_spec, "animacy", "wěc", "mixed") propagate_properties(alternant_multiword_spec, "number", "allthree", "allthree") -- FIXME, the default value (third param) used to be 'm' with a comment indicating that this applied only to -- plural adjectives, where it didn't matter; but here, plural adjectives are distinguished for gender and -- animacy. Make sure 'mixed' works. propagate_properties(alternant_multiword_spec, "gender", "mixed", "mixed") detect_all_indicator_specs(alternant_multiword_spec) -- Propagate 'actual_number' after calling detect_all_indicator_specs(), which sets 'actual_number' for adjectives. propagate_properties(alternant_multiword_spec, "actual_number", "allthree", "allthree") determine_noun_status(alternant_multiword_spec) set_pos(alternant_multiword_spec) alternant_multiword_spec.output_noun_slots = get_output_noun_slots(alternant_multiword_spec) local inflect_props = { skip_slot = function(slot) return skip_slot(alternant_multiword_spec.actual_number, slot) end, slot_table = alternant_multiword_spec.output_noun_slots, get_variants = get_variants, inflect_word_spec = decline_noun, } iut.inflect_multiword_or_alternant_multiword_spec(alternant_multiword_spec, inflect_props) compute_categories_and_annotation(alternant_multiword_spec) alternant_multiword_spec.genders = compute_headword_genders(alternant_multiword_spec) if args.json then alternant_multiword_spec.args = nil return require("Modul:JSON").toJSON(alternant_multiword_spec) end return alternant_multiword_spec end -- Entry point for {{hsb-ndecl}}. Template-callable function to parse and decline a noun given -- user-specified arguments and generate a displayable table of the declined forms. function export.show(frame) local parent_args = frame:getParent().args local alternant_multiword_spec = export.do_generate_forms(parent_args) if type(alternant_multiword_spec) == "string" then -- JSON return value return alternant_multiword_spec end show_forms(alternant_multiword_spec) return make_table(alternant_multiword_spec) .. require("Modul:utilities").format_categories(alternant_multiword_spec.categories, lang, nil, nil, force_cat) end return export crzlxyjm43mpxo7objvvf4zsc9ykve9 Předłoha:Tooltip/styles.css 10 6338 17585 2024-11-22T09:17:41Z Sławobóg 2519 styles 17585 sanitized-css text/css /* {{pp-template}} */ .tooltip-dotted { border-bottom: 1px dotted; cursor: help; } a3nuuy8e07xkoz1hiw7pmpuv4gixony Předłoha:tooltip 10 6339 17586 2024-11-22T09:20:08Z Sławobóg 2519 předłoha 17586 wikitext text/x-wiki <TemplateStyles src="Předłoha:Tooltip/styles.css" /><span class="rt-commentedText" style="cursor: help; border-bottom: 1px dotted;" title="{{#invoke:string|replace|source={{{2}}}|pattern="|replace=&quot;}}" tabindex="0">{{{1}}}</span><noinclude>{{dokumentacija}}</noinclude> qq79k2ylkayl1kphwcj46errmal2rtg