Module:Text: திருத்தங்களுக்கு இடையிலான வேறுபாடு

தமிழர்விக்கியிலிருந்து
Jump to navigation Jump to search
உள்ளடக்கம் நீக்கப்பட்டது உள்ளடக்கம் சேர்க்கப்பட்டது
2024-06-05
imported>Ravidreams
சி en:Module:Text இலிருந்து திருத்தம் இறக்குமதி செய்யப்பட்டன
 
(பயனரால் செய்யப்பட்ட ஒரு இடைப்பட்ட திருத்தம் காட்டப்படவில்லை.)
வரிசை 1: வரிசை 1:
local Text = { serial = "2024-06-05",
local yesNo = require("Module:Yesno")
suite = "Text",
local Text = { serial = "2022-07-21",
item = 29387871 }
suite = "Text" }
--[=[
--[=[
Text utilities
Text utilities
]=]
]=]
local Failsafe = Text
local GlobalMod = Text
local Patterns = { }
local RangesLatin = false
local SeekQuote = false



local foreignModule = function ( access, advanced, append, alt, alert )
-- Fetch global module
-- Precondition:
-- access -- string, with name of base module
-- advanced -- true, for require(); else mw.loadData()
-- append -- string, with subpage part, if any; or false
-- alt -- number, of wikidata item of root; or false
-- alert -- true, for throwing error on data problem
-- Postcondition:
-- Returns whatever, probably table
-- 2019-10-29
local storage = access
local finer = function ()
if append then
storage = string.format( "%s/%s",
storage,
append )
end
end
local fun, lucky, r, suited
if advanced then
fun = require
else
fun = mw.loadData
end
GlobalMod.globalModules = GlobalMod.globalModules or { }
suited = GlobalMod.globalModules[ access ]
if not suited then
finer()
lucky, r = pcall( fun, "Module:" .. storage )
end
if not lucky then
if not suited and
type( alt ) == "number" and
alt > 0 then
suited = string.format( "Q%d", alt )
suited = mw.wikibase.getSitelink( suited )
GlobalMod.globalModules[ access ] = suited or true
end
if type( suited ) == "string" then
storage = suited
finer()
lucky, r = pcall( fun, storage )
end
if not lucky and alert then
error( "Missing or invalid page: " .. storage, 0 )
end
end
return r
end -- foreignModule()



local function factoryQuote()
-- Create quote definitions
if not Text.quoteLang then
local quoting = foreignModule( "Text",
false,
"quoting",
Text.item )
if type( quoting ) == "table" then
Text.quoteLang = quoting.langs
Text.quoteType = quoting.types
end
if type( Text.quoteLang ) ~= "table" then
Text.quoteLang = { }
end
if type( Text.quoteType ) ~= "table" then
Text.quoteType = { }
end
if type( Text.quoteLang.en ) ~= "string" then
Text.quoteLang.en = "ld"
end
if type( Text.quoteType[ Text.quoteLang.en ] ) ~= "table" then
Text.quoteType[ Text.quoteLang.en ] = { { 8220, 8221 },
{ 8216, 8217 } }
end
end
end -- factoryQuote()




local function fiatQuote( apply, alien, advance )
local function fiatQuote( apply, alien, advance )
வரிசை 101: வரிசை 12:
-- alien -- string, with language code
-- alien -- string, with language code
-- advance -- number, with level 1 or 2
-- advance -- number, with level 1 or 2
local r = apply
local r = apply and tostring(apply) or ""
alien = alien or "en"
local quotes, suite
advance = tonumber(advance) or 0
factoryQuote()
if alien then
local suite
suite = mw.text.trim( alien )
local data = mw.loadData('Module:Text/data')
local QuoteLang = data.QuoteLang
if suite == "" then
suite = false
local QuoteType = data.QuoteType
local slang = alien:match( "^(%l+)-" )
else
suite = QuoteLang[alien] or slang and QuoteLang[slang] or QuoteLang["en"]
local s = Text.quoteLang[ suite ]
if s then
if suite then
suite = s
local quotes = QuoteType[ suite ]
if quotes then
local space
if quotes[ 3 ] then
space = " "
else
else
local slang = suite:match( "^(%l+)-" )
space = ""
if slang then
end
suite = Text.quoteLang[ slang ]
quotes = quotes[ advance ]
end
if quotes then
r = mw.ustring.format( "%s%s%s%s%s",
mw.ustring.char( quotes[ 1 ] ),
space,
apply,
space,
mw.ustring.char( quotes[ 2 ] ) )
end
end
end
end
if not suite then
suite = Text.quoteLang.en
end
quotes = Text.quoteType[ suite ]
if quotes then
local space
if quotes[ 3 ] then
space = " "
else
else
space = ""
mw.log( "fiatQuote() " .. suite )
end
end
quotes = quotes[ advance ]
if quotes then
r = mw.ustring.format( "%s%s%s%s%s",
mw.ustring.char( quotes[ 1 ] ),
space,
apply,
space,
mw.ustring.char( quotes[ 2 ] ) )
end
else
mw.log( "fiatQuote() " .. suite )
end
end
return r
return r
வரிசை 155: வரிசை 55:
-- accept -- true, if no error messages to be appended
-- accept -- true, if no error messages to be appended
-- Returns: string
-- Returns: string
local r
local r = ""
if type( apply ) == "table" then
apply = type(apply) == "table" and apply or {}
again = math.floor(tonumber(again) or 1)
local bad = { }
local codes = { }
if again < 1 then
local s
return ""
for k, v in pairs( apply ) do
s = type( v )
if s == "number" then
if v < 32 and v ~= 9 and v ~= 10 then
v = tostring( v )
else
v = math.floor( v )
s = false
end
elseif s ~= "string" then
v = tostring( v )
end
if s then
table.insert( bad, v )
else
table.insert( codes, v )
end
end -- for k, v
if #bad == 0 then
if #codes > 0 then
r = mw.ustring.char( unpack( codes ) )
if again then
if type( again ) == "number" then
local n = math.floor( again )
if n > 1 then
r = r:rep( n )
elseif n < 1 then
r = ""
end
else
s = "bad repetitions: " .. tostring( again )
end
end
end
else
s = "bad codepoints: " .. table.concat( bad, " " )
end
if s and not accept then
r = tostring( mw.html.create( "span" )
:addClass( "error" )
:wikitext( s ) )
end
end
end
return r or ""
local bad = { }
local codes = { }
for _, v in ipairs( apply ) do
local n = tonumber(v)
if not n or (n < 32 and n ~= 9 and n ~= 10) then
table.insert(bad, tostring(v))
else
table.insert(codes, math.floor(n))
end
end
if #bad > 0 then
if not accept then
r = tostring( mw.html.create( "span" )
:addClass( "error" )
:wikitext( "bad codepoints: " .. table.concat( bad, " " )) )
end
return r
end
if #codes > 0 then
r = mw.ustring.char( unpack( codes ) )
if again > 1 then
r = r:rep(again)
end
end
return r
end -- Text.char()
end -- Text.char()


local function trimAndFormat(args, fmt)

local result = {}
if type(args) ~= 'table' then
args = {args}
end
for _, v in ipairs(args) do
v = mw.text.trim(tostring(v))
if v ~= "" then
table.insert(result,fmt and mw.ustring.format(fmt, v) or v)
end
end
return result
end


Text.concatParams = function ( args, apply, adapt )
Text.concatParams = function ( args, apply, adapt )
வரிசை 216: வரிசை 110:
-- Returns: string
-- Returns: string
local collect = { }
local collect = { }
return table.concat(trimAndFormat(args,adapt), apply or "|")
for k, v in pairs( args ) do
if type( k ) == "number" then
v = mw.text.trim( v )
if v ~= "" then
if adapt then
v = mw.ustring.format( adapt, v )
end
table.insert( collect, v )
end
end
end -- for k, v
return table.concat( collect, apply or "|" )
end -- Text.concatParams()
end -- Text.concatParams()






Text.containsCJK = function ( analyse )
Text.containsCJK = function ( s )
-- Is any CJK code within?
-- Is any CJK code within?
-- Parameter:
-- Parameter:
-- analyse -- string
-- s -- string
-- Returns: true, if CJK detected
-- Returns: true, if CJK detected
s = s and tostring(s) or ""
local r
local patternCJK = mw.loadData('Module:Text/data').PatternCJK
if not Patterns.CJK then
Patterns.CJK = mw.ustring.char( 91,
return mw.ustring.find( s, patternCJK ) ~= nil
0x3400, 45, 0x9FFF,
0x20000, 45, 0x2B81F,
93 )
end
if mw.ustring.find( analyse, Patterns.CJK ) then
r = true
else
r = false
end
return r
end -- Text.containsCJK()
end -- Text.containsCJK()


Text.removeDelimited = function (s, prefix, suffix)

-- Remove all text in s delimited by prefix and suffix (inclusive)
-- Arguments:
-- s = string to process
-- prefix = initial delimiter
-- suffix = ending delimiter
-- Returns: stripped string
s = s and tostring(s) or ""
prefix = prefix and tostring(prefix) or ""
suffix = suffix and tostring(suffix) or ""
local prefixLen = mw.ustring.len(prefix)
local suffixLen = mw.ustring.len(suffix)
if prefixLen == 0 or suffixLen == 0 then
return s
end
local i = s:find(prefix, 1, true)
local r = s
local j
while i do
j = r:find(suffix, i + prefixLen)
if j then
r = r:sub(1, i - 1)..r:sub(j+suffixLen)
else
r = r:sub(1, i - 1)
end
i = r:find(prefix, 1, true)
end
return r
end


Text.getPlain = function ( adjust )
Text.getPlain = function ( adjust )
வரிசை 259: வரிசை 160:
-- adjust -- string
-- adjust -- string
-- Returns: string
-- Returns: string
local i = adjust:find( "<!--", 1, true )
local r = Text.removeDelimited(adjust,"<!--","-->")
local r = adjust
local j
while i do
j = r:find( "-->", i + 3, true )
if j then
r = r:sub( 1, i ) .. r:sub( j + 3 )
else
r = r:sub( 1, i )
end
i = r:find( "<!--", i, true )
end -- "<!--"
r = r:gsub( "(</?%l[^>]*>)", "" )
r = r:gsub( "(</?%l[^>]*>)", "" )
:gsub( "'''(.+)'''", "%1" )
:gsub( "'''", "" )
:gsub( "''(.+)''", "%1" )
:gsub( "''", "" )
:gsub( "&nbsp;", " " )
:gsub( "&nbsp;", " " )
return mw.text.unstrip( r )
return r
end -- Text.getPlain()
end -- Text.getPlain()


Text.isLatinRange = function (s)


Text.isLatinRange = function ( adjust )
-- Are characters expected to be latin or symbols within latin texts?
-- Are characters expected to be latin or symbols within latin texts?
-- Precondition:
-- Arguments:
-- adjust -- string, or nil for initialization
-- s = string to analyze
-- Returns: true, if valid for latin only
-- Returns: true, if valid for latin only
s = s and tostring(s) or "" --- ensure input is always string
local r
local PatternLatin = mw.loadData('Module:Text/data').PatternLatin
if not RangesLatin then
return mw.ustring.match(s, PatternLatin) ~= nil
RangesLatin = { { 0x07, 0x02AF },
{ 0x1D6B, 0x1D9A },
{ 0x1E00, 0x1EFF },
{ 0x2002, 0x203A },
{ 0x2190, 0x23BD } }
end
if not Patterns.Latin then
local range
Patterns.Latin = "^["
for i = 1, #RangesLatin do
range = RangesLatin[ i ]
Patterns.Latin = Patterns.Latin ..
mw.ustring.char( range[ 1 ], 45, range[ 2 ] )
end -- for i
Patterns.Latin = Patterns.Latin .. "]*$"
end
if adjust then
if mw.ustring.match( adjust, Patterns.Latin ) then
r = true
else
r = false
end
end
return r
end -- Text.isLatinRange()
end -- Text.isLatinRange()






Text.isQuote = function ( ask )
Text.isQuote = function ( s )
-- Is this character any quotation mark?
-- Is this character any quotation mark?
-- Parameter:
-- Parameter:
-- ask -- string, with single character
-- s = single character to analyze
-- Returns: true, if ask is quotation mark
-- Returns: true, if s is quotation mark
s = s and tostring(s) or ""
local r
if not SeekQuote then
if s == "" then
return false
SeekQuote = mw.ustring.char( 34, -- "
39, -- '
171, -- laquo
187, -- raquo
8216, -- lsquo
8217, -- rsquo
8218, -- sbquo
8220, -- ldquo
8221, -- rdquo
8222, -- bdquo
8249, -- lsaquo
8250, -- rsaquo
0x300C, -- CJK
0x300D, -- CJK
0x300E, -- CJK
0x300F ) -- CJK
end
end
local SeekQuote = mw.loadData('Module:Text/data').SeekQuote
if ask == "" then
return mw.ustring.find( SeekQuote, s, 1, true ) ~= nil
r = false
elseif mw.ustring.find( SeekQuote, ask, 1, true ) then
r = true
else
r = false
end
return r
end -- Text.isQuote()
end -- Text.isQuote()


வரிசை 357: வரிசை 201:
-- adapt -- string (optional); format including "%s"
-- adapt -- string (optional); format including "%s"
-- Returns: string
-- Returns: string
return mw.text.listToText(trimAndFormat(args, adapt))
local collect = { }
for k, v in pairs( args ) do
if type( k ) == "number" then
v = mw.text.trim( v )
if v ~= "" then
if adapt then
v = mw.ustring.format( adapt, v )
end
table.insert( collect, v )
end
end
end -- for k, v
return mw.text.listToText( collect )
end -- Text.listToText()
end -- Text.listToText()


வரிசை 381: வரிசை 213:
-- advance -- number, with level 1 or 2, or nil
-- advance -- number, with level 1 or 2, or nil
-- Returns: quoted string
-- Returns: quoted string
apply = apply and tostring(apply) or ""
local mode, slang
local mode, slang
if type( alien ) == "string" then
if type( alien ) == "string" then
slang = mw.text.trim( alien ):lower()
slang = mw.text.trim( alien ):lower()
else
else
local pageLang = mw.title.getCurrentTitle().pageLanguage
slang = mw.title.getCurrentTitle().pageLanguage
if pageLang then
if not slang then
slang = pageLang.code
-- TODO FIXME: Introduction expected 2017-04
else
slang = mw.language.getContentLanguage():getCode()
slang = mw.language.getContentLanguage():getCode()
end
end
வரிசை 409: வரிசை 241:
-- advance -- number, with level 1 or 2, or nil
-- advance -- number, with level 1 or 2, or nil
-- Returns: string; possibly quoted
-- Returns: string; possibly quoted
local r = mw.text.trim( apply )
local r = mw.text.trim( apply and tostring(apply) or "" )
local s = mw.ustring.sub( r, 1, 1 )
local s = mw.ustring.sub( r, 1, 1 )
if s ~= "" and not Text.isQuote( s, advance ) then
if s ~= "" and not Text.isQuote( s, advance ) then
வரிசை 429: வரிசை 261:
-- or basic greek or cyrillic or symbols etc.
-- or basic greek or cyrillic or symbols etc.
local cleanup, decomposed
local cleanup, decomposed
local PatternCombined = mw.loadData('Module:Text/data').PatternCombined
if not Patterns.Combined then
Patterns.Combined = mw.ustring.char( 91,
decomposed = mw.ustring.toNFD( adjust and tostring(adjust) or "" )
cleanup = mw.ustring.gsub( decomposed, PatternCombined, "" )
0x0300, 45, 0x036F,
0x1AB0, 45, 0x1AFF,
0x1DC0, 45, 0x1DFF,
0xFE20, 45, 0xFE2F,
93 )
end
decomposed = mw.ustring.toNFD( adjust )
cleanup = mw.ustring.gsub( decomposed, Patterns.Combined, "" )
return mw.ustring.toNFC( cleanup )
return mw.ustring.toNFC( cleanup )
end -- Text.removeDiacritics()
end -- Text.removeDiacritics()



Text.removeWhitespace = function ( adjust )
-- Remove all whitespace, or replace with ASCII space
-- Parameter:
-- adjust -- string
-- Returns: string; modified
local r = mw.text.decode( adjust )
if r:find( "&", 1, true ) then
r = r:gsub( "&lrm;", "" )
:gsub( "&rlm;", "" )
:gsub( "&zwj;", "" )
:gsub( "&zwnj;", "" )
:gsub( "&thinsp;", " " )
:gsub( "&ensp;", " " )
:gsub( "&emsp;", " " )
end
if not Patterns.Whitespace then
Patterns.Whitespace = mw.ustring.char( 0x00AD,
91, 0x200C, 45, 0x200F, 93,
91, 0x2028, 45, 0x202E, 93,
0x205F,
0x2060 )
Patterns.Space = mw.ustring.char( 0x00A0,
0x1680,
91, 0x2000, 45, 0x200A, 93,
0x202F,
0x205F,
0x3000,
0x303F )
end
r = mw.ustring.gsub( r, Patterns.Whitespace, "" )
r = mw.ustring.gsub( r, Patterns.Space, " " )
return mw.text.trim( r )
end -- Text.removeWhitespace()




வரிசை 486: வரிசை 275:
-- analyse -- string
-- analyse -- string
-- Returns: true, if sentence terminated
-- Returns: true, if sentence terminated
local r = mw.text.trim( analyse )
local r
local PatternTerminated = mw.loadData('Module:Text/data').PatternTerminated
local lt = r:find( "<", 1, true )
if mw.ustring.find( analyse, PatternTerminated ) then
if not Patterns.Terminated then
Patterns.Terminated = mw.ustring.char( 91,
0x3002,
0xFF01,
0xFF0E,
0xFF1F )
.. "!%.%?…][\"'%]‹›«»‘’“”]*$"
end
if lt then
r = r:gsub( "</span>", "" )
end
if mw.ustring.find( r, Patterns.Terminated ) then
r = true
r = true
elseif lt then
local s = "<bdi[^>]* dir=\"([lr]t[rl])\".+</bdi></bdo>"
s = r:match( s )
if s then
if mw.language.getContentLanguage():isRTL() then
r = ( s == "ltr" )
else
r = ( s == "rtl" )
end
else
r = false
end
else
else
r = false
r = false
வரிசை 521: வரிசை 287:




Text.tokenWords = function ( adjust )
Text.ucfirstAll = function ( adjust)
-- Split text in words of digits or letters
-- Precondition:
-- adjust -- string
-- Returns: string with
local r = mw.uri.decode( adjust, "WIKI" )
if r:find( "&", 1, true ) then
r = mw.text.decode( r )
end
r = Text.removeWhitespace( r )
r = mw.ustring.gsub( r, "[%p%s]+", " " )
return r
end -- Text.tokenWords()



Text.ucfirstAll = function ( adjust )
-- Capitalize all words
-- Capitalize all words
-- Precondition:
-- Arguments:
-- adjust -- string
-- adjust = string to adjust
-- Returns: string with all first letters in upper case
-- Returns: string with all first letters in upper case
local r = " " .. adjust
adjust = adjust and tostring(adjust) or ""
local r = mw.text.decode(adjust,true)
local i = 1
local i = 1
local c, j, m
local c, j, m
if adjust:find( "&" ) then
m = (r ~= adjust)
r = r:gsub( "&amp;", "&#38;" )
r = " "..r
:gsub( "&lt;", "&#60;" )
:gsub( "&gt;", "&#62;" )
:gsub( "&nbsp;", "&#160;" )
:gsub( "&thinsp;", "&#8201;" )
:gsub( "&zwnj;", "&#8204;" )
:gsub( "&zwj;", "&#8205;" )
:gsub( "&lrm;", "&#8206;" )
:gsub( "&rlm;", "&#8207;" )
m = true
end
while i do
while i do
i = mw.ustring.find( r, "%W%l", i )
i = mw.ustring.find( r, "%W%l", i )
வரிசை 571: வரிசை 312:
r = r:sub( 2 )
r = r:sub( 2 )
if m then
if m then
r = mw.text.encode(r)
r = r:gsub( "&#38;", "&amp;" )
:gsub( "&#60;", "&lt;" )
:gsub( "&#62;", "&gt;" )
:gsub( "&#160;", "&nbsp;" )
:gsub( "&#8201;", "&thinsp;" )
:gsub( "&#8204;", "&zwnj;" )
:gsub( "&#8205;", "&zwj;" )
:gsub( "&#8206;", "&lrm;" )
:gsub( "&#8207;", "&rlm;" )
:gsub( "&#X(%x+);", "&#x%1;" )
end
end
return r
return r
end -- Text.ucfirstAll()
end -- Text.ucfirstAll()





வரிசை 594: வரிசை 325:
-- Returns: string with non-latin parts enclosed in <span>
-- Returns: string with non-latin parts enclosed in <span>
local r
local r
local data = mw.loadData('Module:Text/data')
Text.isLatinRange()
local PatternLatin = data.PatternLatin
if mw.ustring.match( adjust, Patterns.Latin ) then
local RangesLatin = data.RangesLatin
local NumLatinRanges = data.NumLatinRanges
if mw.ustring.match( adjust, PatternLatin ) then
-- latin only, horizontal dashes, quotes
-- latin only, horizontal dashes, quotes
r = adjust
r = adjust
else
else
local c
local c
local e = mw.html.create( "span" )
local j = false
local j = false
local k = 1
local k = 1
local m = false
local m = false
local n = mw.ustring.len( adjust )
local n = mw.ustring.len( adjust )
local span = "%s%s<span dir='auto' style='font-style:normal'>%s</span>"
local p
local flat = function ( a )
local flat = function ( a )
-- isLatin
-- isLatin
local range
local range
for i = 1, #RangesLatin do
-- NumLatinRanges has to be precomputed because # does not work from loadData
for i = 1, NumLatinRanges do
range = RangesLatin[ i ]
range = RangesLatin[ i ]
if a >= range[ 1 ] and a <= range[ 2 ] then
if a >= range[ 1 ] and a <= range[ 2 ] then
வரிசை 627: வரிசை 361:
end -- focus()
end -- focus()
local form = function ( a )
local form = function ( a )
return string.format( p,
return string.format( span,
r,
r,
mw.ustring.sub( adjust, k, j - 1 ),
mw.ustring.sub( adjust, k, j - 1 ),
mw.ustring.sub( adjust, j, a ) )
mw.ustring.sub( adjust, j, a ) )
end -- form()
end -- form()
e:attr( "dir", "auto" )
:css( "font-style", "normal" )
:wikitext( "%s" )
p = "%s%s" .. tostring( e )
r = ""
r = ""
for i = 1, n do
for i = 1, n do
வரிசை 687: வரிசை 417:
return r
return r
end -- Text.uprightNonlatin()
end -- Text.uprightNonlatin()



Failsafe.failsafe = function ( atleast )
-- Retrieve versioning and check for compliance
-- Precondition:
-- atleast -- string, with required version
-- or wikidata|item|~|@ or false
-- Postcondition:
-- Returns string -- with queried version/item, also if problem
-- false -- if appropriate
-- 2024-03-01
local since = atleast
local last = ( since == "~" )
local linked = ( since == "@" )
local link = ( since == "item" )
local r
if last or link or linked or since == "wikidata" then
local item = Failsafe.item
since = false
if type( item ) == "number" and item > 0 then
local suited = string.format( "Q%d", item )
if link then
r = suited
else
local entity = mw.wikibase.getEntity( suited )
if type( entity ) == "table" then
local seek = Failsafe.serialProperty or "P348"
local vsn = entity:formatPropertyValues( seek )
if type( vsn ) == "table" and
type( vsn.value ) == "string" and
vsn.value ~= "" then
if last and vsn.value == Failsafe.serial then
r = false
elseif linked then
if mw.title.getCurrentTitle().prefixedText
== mw.wikibase.getSitelink( suited ) then
r = false
else
r = suited
end
else
r = vsn.value
end
end
end
end
elseif link then
r = false
end
end
if type( r ) == "nil" then
if not since or since <= Failsafe.serial then
r = Failsafe.serial
else
r = false
end
end
return r
end -- Failsafe.failsafe()





வரிசை 753: வரிசை 422:
local r
local r
if about == "quote" then
if about == "quote" then
factoryQuote()
data = mw.loadData('Module:Text/data')
r = { QuoteLang = Text.quoteLang,
r = { }
QuoteType = Text.quoteType }
r.QuoteLang = data.QuoteLang
r.QuoteType = data.QuoteType
end
end
return r
return r
end -- Text.test()
end -- Text.test()


-- Non Unicode-aware version of mw.text.split and mw.text.gsplit
-- based on [[phab:diffusion/ELUA/browse/master/includes/Engines/LuaCommon/lualib/mw.text.lua]]
-- These run up to 60 times faster than the Unicode-aware versions
Text.split = function ( text, pattern, plain )
local ret = {}
for m in Text.gsplit( text, pattern, plain ) do
ret[#ret+1] = m
end
return ret
end


Text.gsplit = function ( text, pattern, plain )
local s, l = 1, string.len( text )
return function ()
if s then
local e, n = string.find( text, pattern, s, plain )
local ret
if not e then
ret = string.sub( text, s )
s = nil
elseif n < e then
-- Empty separator!
ret = string.sub( text, s, e )
if e < l then
s = e + 1
else
s = nil
end
else
ret = e > s and string.sub( text, s, e - 1 ) or ''
s = n + 1
end
return ret
end
end, nil, nil
end


-- Export
-- Export
local p = { }
local p = { }

for _, func in ipairs({'containsCJK','isLatinRange','isQuote','sentenceTerminated'}) do
p[func] = function (frame)
return Text[func]( frame.args[ 1 ] or "" ) and "1" or ""
end
end

for _, func in ipairs({'getPlain','removeDiacritics','ucfirstAll','uprightNonlatin'}) do
p[func] = function (frame)
return Text[func]( frame.args[ 1 ] or "" )
end
end


function p.char( frame )
function p.char( frame )
வரிசை 774: வரிசை 491:
end
end
if story then
if story then
local items = mw.text.split( story, "%s+" )
local items = mw.text.split( mw.text.trim(story), "%s+" )
if #items > 0 then
if #items > 0 then
local j
local j
lenient = ( params.errors == "0" )
lenient = (yesNo(params.errors) == false)
codes = { }
codes = { }
multiple = tonumber( params[ "*" ] )
multiple = tonumber( params[ "*" ] )
for k, v in pairs( items ) do
for _, v in ipairs( items ) do
if v:sub( 1, 1 ) == "x" then
j = tonumber((v:sub( 1, 1 ) == "x" and "0" or "") .. v)
j = tonumber( "0" .. v )
table.insert( codes, j or v )
elseif v == "" then
end
v = false
else
j = tonumber( v )
end
if v then
table.insert( codes, j or v )
end
end -- for k, v
end
end
end
end
வரிசை 813: வரிசை 522:
frame.args.format )
frame.args.format )
end
end

function p.containsCJK( frame )
return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or ""
end

function p.getPlain( frame )
return Text.getPlain( frame.args[ 1 ] or "" )
end

function p.isLatinRange( frame )
return Text.isLatinRange( frame.args[ 1 ] or "" ) and "1" or ""
end

function p.isQuote( frame )
return Text.isQuote( frame.args[ 1 ] or "" ) and "1" or ""
end





வரிசை 856: வரிசை 548:
result_line = pformat
result_line = pformat
for j = 1, #lists do
for j = 1, #lists do
result_line = mw.ustring.gsub( result_line,
result_line = mw.ustring.gsub(result_line, "%%s", lists[j][i], 1)
"%%s",
lists[ j ][ i ],
1 )
end
end
result = result .. result_line
result = result .. result_line
வரிசை 913: வரிசை 602:
tonumber( frame.args[3] ) )
tonumber( frame.args[3] ) )
end
end



function p.removeDiacritics( frame )
return Text.removeDiacritics( frame.args[ 1 ] or "" )
end

function p.sentenceTerminated( frame )
return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or ""
end

function p.tokenWords( frame )
return Text.tokenWords( frame.args[ 1 ] or "" )
end

function p.ucfirstAll( frame )
return Text.ucfirstAll( frame.args[ 1 ] or "" )
end

function p.unstrip( frame )
return mw.text.trim( mw.text.unstrip( frame.args[ 1 ] or "" ) )
end

function p.uprightNonlatin( frame )
return Text.uprightNonlatin( frame.args[ 1 ] or "" )
end





வரிசை 983: வரிசை 645:




function p.split(frame)
local text = frame.args.text or frame.args[1] or ''
local pattern = frame.args.pattern or frame.args[2] or ''
local plain = yesNo(frame.args.plain or frame.args[3])
local index = tonumber(frame.args.index) or tonumber(frame.args[4]) or 1
local a = Text.split(text, pattern, plain)
if index < 0 then index = #a + index + 1 end
return a[index]
end


p.failsafe = function ( frame )
-- Versioning interface
local s = type( frame )
local since
if s == "table" then
since = frame.args[ 1 ]
elseif s == "string" then
since = frame
end
if since then
since = mw.text.trim( since )
if since == "" then
since = false
end
end
return Failsafe.failsafe( since ) or ""
end -- p.failsafe()


function p.failsafe()
return Text.serial
end




வரிசை 1,007: வரிசை 664:
return Text
return Text
end -- p.Text
end -- p.Text

setmetatable( p, { __call = function ( func, ... )
setmetatable( p, nil )
return Failsafe
end } )


return p
return p

07:21, 26 மார்ச்சு 2025 இல் கடைசித் திருத்தம்

Documentation for this module may be created at Module:Text/doc

local yesNo = require("Module:Yesno")
local Text = { serial = "2022-07-21",
               suite  = "Text" }
--[=[
Text utilities
]=]

local function fiatQuote( apply, alien, advance )
    -- Quote text
    -- Parameter:
    --     apply    -- string, with text
    --     alien    -- string, with language code
    --     advance  -- number, with level 1 or 2
    local r = apply and tostring(apply) or ""
    alien = alien or "en"
    advance = tonumber(advance) or 0
    local suite
    local data = mw.loadData('Module:Text/data')
    local QuoteLang = data.QuoteLang
    local QuoteType = data.QuoteType
    local slang = alien:match( "^(%l+)-" )
    suite = QuoteLang[alien] or slang and QuoteLang[slang] or QuoteLang["en"]
    if suite then
        local quotes = QuoteType[ suite ]
        if quotes then
            local space
            if quotes[ 3 ] then
                space = "&#160;"
            else
                space = ""
            end
            quotes = quotes[ advance ]
            if quotes then
                r = mw.ustring.format( "%s%s%s%s%s",
                                       mw.ustring.char( quotes[ 1 ] ),
                                       space,
                                       apply,
                                       space,
                                       mw.ustring.char( quotes[ 2 ] ) )
            end
        else
            mw.log( "fiatQuote() " .. suite )
        end
    end
    return r
end -- fiatQuote()



Text.char = function ( apply, again, accept )
    -- Create string from codepoints
    -- Parameter:
    --     apply   -- table (sequence) with numerical codepoints, or nil
    --     again   -- number of repetitions, or nil
    --     accept  -- true, if no error messages to be appended
    -- Returns: string
    local r = ""
    apply = type(apply) == "table" and apply or {}
    again = math.floor(tonumber(again) or 1)
    if again < 1 then
    	return ""
    end
    local bad   = { }
    local codes = { }
    for _, v in ipairs( apply ) do
    	local n = tonumber(v)
    	if not n or (n < 32 and n ~= 9 and n ~= 10) then
    		table.insert(bad, tostring(v))
    	else
    		table.insert(codes, math.floor(n))
		end
    end 
    if #bad > 0 then
    	if not accept then
    		r = tostring(  mw.html.create( "span" )
                    		:addClass( "error" )
                    		:wikitext( "bad codepoints: " .. table.concat( bad, " " )) )
    	end
    	return r
    end
    if #codes > 0 then
    	r = mw.ustring.char( unpack( codes ) )
    	if again > 1 then
    		r = r:rep(again)
    	end
	end
    return r
end -- Text.char()

local function trimAndFormat(args, fmt)
	local result = {}
	if type(args) ~= 'table' then
		args = {args}
	end
	for _, v in ipairs(args) do
		v = mw.text.trim(tostring(v))
		if v ~= "" then
			table.insert(result,fmt and mw.ustring.format(fmt, v) or v)
		end
	end
	return result
end

Text.concatParams = function ( args, apply, adapt )
    -- Concat list items into one string
    -- Parameter:
    --     args   -- table (sequence) with numKey=string
    --     apply  -- string (optional); separator (default: "|")
    --     adapt  -- string (optional); format including "%s"
    -- Returns: string
    local collect = { }
    return table.concat(trimAndFormat(args,adapt), apply or "|")
end -- Text.concatParams()



Text.containsCJK = function ( s )
    -- Is any CJK code within?
    -- Parameter:
    --     s  -- string
    -- Returns: true, if CJK detected
    s = s and tostring(s) or ""
    local patternCJK = mw.loadData('Module:Text/data').PatternCJK
    return mw.ustring.find( s, patternCJK ) ~= nil
end -- Text.containsCJK()

Text.removeDelimited = function (s, prefix, suffix)
	-- Remove all text in s delimited by prefix and suffix (inclusive)
	-- Arguments:
	--    s = string to process
	--    prefix = initial delimiter
	--    suffix = ending delimiter
	-- Returns: stripped string
	s = s and tostring(s) or ""
	prefix = prefix and tostring(prefix) or ""
	suffix = suffix and tostring(suffix) or ""
	local prefixLen = mw.ustring.len(prefix)
	local suffixLen = mw.ustring.len(suffix)
	if prefixLen == 0 or suffixLen == 0 then
		return s
	end
	local i = s:find(prefix, 1, true)
	local r = s
	local j
	while i do
		j = r:find(suffix, i + prefixLen)
		if j then
			r = r:sub(1, i - 1)..r:sub(j+suffixLen)
		else
			r = r:sub(1, i - 1)
		end
		i = r:find(prefix, 1, true)
	end
	return r
end

Text.getPlain = function ( adjust )
    -- Remove wikisyntax from string, except templates
    -- Parameter:
    --     adjust  -- string
    -- Returns: string
    local r = Text.removeDelimited(adjust,"<!--","-->")
    r = r:gsub( "(</?%l[^>]*>)", "" )
         :gsub( "'''", "" )
         :gsub( "''", "" )
         :gsub( "&nbsp;", " " )
    return r
end -- Text.getPlain()

Text.isLatinRange = function (s)
    -- Are characters expected to be latin or symbols within latin texts?
    -- Arguments:
    --  s = string to analyze
    -- Returns: true, if valid for latin only
    s = s and tostring(s) or ""  --- ensure input is always string
    local PatternLatin = mw.loadData('Module:Text/data').PatternLatin
    return mw.ustring.match(s, PatternLatin) ~= nil
end -- Text.isLatinRange()



Text.isQuote = function ( s )
    -- Is this character any quotation mark?
    -- Parameter:
    --     s = single character to analyze
    -- Returns: true, if s is quotation mark
    s = s and tostring(s) or ""
    if s == "" then
    	return false
    end
    local SeekQuote = mw.loadData('Module:Text/data').SeekQuote
    return mw.ustring.find( SeekQuote, s, 1, true ) ~= nil
end -- Text.isQuote()



Text.listToText = function ( args, adapt )
    -- Format list items similar to mw.text.listToText()
    -- Parameter:
    --     args   -- table (sequence) with numKey=string
    --     adapt  -- string (optional); format including "%s"
    -- Returns: string
    return mw.text.listToText(trimAndFormat(args, adapt))
end -- Text.listToText()



Text.quote = function ( apply, alien, advance )
    -- Quote text
    -- Parameter:
    --     apply    -- string, with text
    --     alien    -- string, with language code, or nil
    --     advance  -- number, with level 1 or 2, or nil
    -- Returns: quoted string
    apply = apply and tostring(apply) or ""
    local mode, slang
    if type( alien ) == "string" then
        slang = mw.text.trim( alien ):lower()
    else
        slang = mw.title.getCurrentTitle().pageLanguage
        if not slang then
            -- TODO FIXME: Introduction expected 2017-04
            slang = mw.language.getContentLanguage():getCode()
        end
    end
    if advance == 2 then
        mode = 2
    else
        mode = 1
    end
    return fiatQuote( mw.text.trim( apply ), slang, mode )
end -- Text.quote()



Text.quoteUnquoted = function ( apply, alien, advance )
    -- Quote text, if not yet quoted and not empty
    -- Parameter:
    --     apply    -- string, with text
    --     alien    -- string, with language code, or nil
    --     advance  -- number, with level 1 or 2, or nil
    -- Returns: string; possibly quoted
    local r = mw.text.trim( apply and tostring(apply) or "" )
    local s = mw.ustring.sub( r, 1, 1 )
    if s ~= ""  and  not Text.isQuote( s, advance ) then
        s = mw.ustring.sub( r, -1, 1 )
        if not Text.isQuote( s ) then
            r = Text.quote( r, alien, advance )
        end
    end
    return r
end -- Text.quoteUnquoted()



Text.removeDiacritics = function ( adjust )
    -- Remove all diacritics
    -- Parameter:
    --     adjust  -- string
    -- Returns: string; all latin letters should be ASCII
    --                  or basic greek or cyrillic or symbols etc.
    local cleanup, decomposed
    local PatternCombined = mw.loadData('Module:Text/data').PatternCombined
    decomposed = mw.ustring.toNFD( adjust and tostring(adjust) or "" )
    cleanup    = mw.ustring.gsub( decomposed, PatternCombined, "" )
    return mw.ustring.toNFC( cleanup )
end -- Text.removeDiacritics()



Text.sentenceTerminated = function ( analyse )
    -- Is string terminated by dot, question or exclamation mark?
    --     Quotation, link termination and so on granted
    -- Parameter:
    --     analyse  -- string
    -- Returns: true, if sentence terminated
    local r
    local PatternTerminated = mw.loadData('Module:Text/data').PatternTerminated
    if mw.ustring.find( analyse, PatternTerminated ) then
        r = true
    else
        r = false
    end
    return r
end -- Text.sentenceTerminated()



Text.ucfirstAll = function ( adjust)
    -- Capitalize all words
    -- Arguments:
    --     adjust = string to adjust
    -- Returns: string with all first letters in upper case
    adjust = adjust and tostring(adjust) or ""
    local r = mw.text.decode(adjust,true)
    local i = 1
    local c, j, m
    m = (r ~= adjust)
    r = " "..r
    while i do
        i = mw.ustring.find( r, "%W%l", i )
        if i then
            j = i + 1
            c = mw.ustring.upper( mw.ustring.sub( r, j, j ) )
            r = string.format( "%s%s%s",
                               mw.ustring.sub( r, 1, i ),
                               c,
                               mw.ustring.sub( r, i + 2 ) )
            i = j
        end
    end -- while i
    r = r:sub( 2 )
    if m then
    	r = mw.text.encode(r)
    end
    return r
end -- Text.ucfirstAll()


Text.uprightNonlatin = function ( adjust )
    -- Ensure non-italics for non-latin text parts
    --     One single greek letter might be granted
    -- Precondition:
    --     adjust  -- string
    -- Returns: string with non-latin parts enclosed in <span>
    local r
    local data = mw.loadData('Module:Text/data')
    local PatternLatin = data.PatternLatin
    local RangesLatin = data.RangesLatin
    local NumLatinRanges = data.NumLatinRanges
    if mw.ustring.match( adjust, PatternLatin ) then
        -- latin only, horizontal dashes, quotes
        r = adjust
    else
        local c
        local j    = false
        local k    = 1
        local m    = false
        local n    = mw.ustring.len( adjust )
        local span = "%s%s<span dir='auto' style='font-style:normal'>%s</span>"
        local flat = function ( a )
                  -- isLatin
                  local range
                  -- NumLatinRanges has to be precomputed because # does not work from loadData
                  for i = 1, NumLatinRanges do
                      range = RangesLatin[ i ]
                      if a >= range[ 1 ]  and  a <= range[ 2 ] then
                          return true
                      end
                  end    -- for i
              end -- flat()
        local focus = function ( a )
                  -- char is not ambivalent
                  local r = ( a > 64 )
                  if r then
                      r = ( a < 8192  or  a > 8212 )
                  else
                      r = ( a == 38  or  a == 60 )    -- '&' '<'
                  end
                  return r
              end -- focus()
        local form = function ( a )
                return string.format( span,
                                      r,
                                      mw.ustring.sub( adjust, k, j - 1 ),
                                      mw.ustring.sub( adjust, j, a ) )
              end -- form()
        r = ""
        for i = 1, n do
            c = mw.ustring.codepoint( adjust, i, i )
            if focus( c ) then
                if flat( c ) then
                    if j then
                        if m then
                            if i == m then
                                -- single greek letter.
                                j = false
                            end
                            m = false
                        end
                        if j then
                            local nx = i - 1
                            local s  = ""
                            for ix = nx, 1, -1 do
                                c = mw.ustring.sub( adjust, ix, ix )
                                if c == " "  or  c == "(" then
                                    nx = nx - 1
                                    s  = c .. s
                                else
                                    break -- for ix
                                end
                            end -- for ix
                            r = form( nx ) .. s
                            j = false
                            k = i
                        end
                    end
                elseif not j then
                    j = i
                    if c >= 880  and  c <= 1023 then
                        -- single greek letter?
                        m = i + 1
                    else
                        m = false
                    end
                end
            elseif m then
                m = m + 1
            end
        end    -- for i
        if j  and  ( not m  or  m < n ) then
            r = form( n )
        else
            r = r .. mw.ustring.sub( adjust, k )
        end
    end
    return r
end -- Text.uprightNonlatin()


Text.test = function ( about )
    local r
    if about == "quote" then
        data = mw.loadData('Module:Text/data')
        r = { }
        r.QuoteLang = data.QuoteLang
        r.QuoteType = data.QuoteType
    end
    return r
end -- Text.test()

-- Non Unicode-aware version of mw.text.split and mw.text.gsplit
-- based on [[phab:diffusion/ELUA/browse/master/includes/Engines/LuaCommon/lualib/mw.text.lua]]
-- These run up to 60 times faster than the Unicode-aware versions
Text.split = function ( text, pattern, plain )
	local ret = {}
	for m in Text.gsplit( text, pattern, plain ) do
		ret[#ret+1] = m
	end
	return ret
end

Text.gsplit = function ( text, pattern, plain )
	local s, l = 1, string.len( text )
	return function ()
		if s then
			local e, n = string.find( text, pattern, s, plain )
			local ret
			if not e then
				ret = string.sub( text, s )
				s = nil
			elseif n < e then
				-- Empty separator!
				ret = string.sub( text, s, e )
				if e < l then
					s = e + 1
				else
					s = nil
				end
			else
				ret = e > s and string.sub( text, s, e - 1 ) or ''
				s = n + 1
			end
			return ret
		end
	end, nil, nil
end

-- Export
local p = { }

for _, func in ipairs({'containsCJK','isLatinRange','isQuote','sentenceTerminated'}) do
	p[func] = function (frame) 
		return Text[func]( frame.args[ 1 ] or "" ) and "1" or ""
	end
end

for _, func in ipairs({'getPlain','removeDiacritics','ucfirstAll','uprightNonlatin'}) do
	p[func] = function (frame) 
		return Text[func]( frame.args[ 1 ] or "" )
	end
end

function p.char( frame )
    local params = frame:getParent().args
    local story = params[ 1 ]
    local codes, lenient, multiple
    if not story then
        params = frame.args
        story  = params[ 1 ]
    end
    if story then
        local items = mw.text.split( mw.text.trim(story), "%s+" )
        if #items > 0 then
            local j
            lenient  = (yesNo(params.errors) == false)
            codes    = { }
            multiple = tonumber( params[ "*" ] )
            for _, v in ipairs( items ) do
            	j = tonumber((v:sub( 1, 1 ) == "x" and "0" or "") .. v)
                table.insert( codes,  j or v )
            end 
        end
    end
    return Text.char( codes, multiple, lenient )
end

function p.concatParams( frame )
    local args
    local template = frame.args.template
    if type( template ) == "string" then
        template = mw.text.trim( template )
        template = ( template == "1" )
    end
    if template then
        args = frame:getParent().args
    else
        args = frame.args
    end
    return Text.concatParams( args,
                              frame.args.separator,
                              frame.args.format )
end


function p.listToFormat(frame)
    local lists = {}
    local pformat = frame.args["format"]
    local sep = frame.args["sep"] or ";"

    -- Parameter parsen: Listen
    for k, v in pairs(frame.args) do
        local knum = tonumber(k)
        if knum then lists[knum] = v end
    end

    -- Listen splitten
    local maxListLen = 0
    for i = 1, #lists do
        lists[i] = mw.text.split(lists[i], sep)
        if #lists[i] > maxListLen then maxListLen = #lists[i] end
    end

    -- Ergebnisstring generieren
    local result = ""
    local result_line = ""
    for i = 1, maxListLen do
        result_line = pformat
        for j = 1, #lists do
            result_line = mw.ustring.gsub(result_line, "%%s", lists[j][i], 1)
        end
        result = result .. result_line
    end

    return result
end



function p.listToText( frame )
    local args
    local template = frame.args.template
    if type( template ) == "string" then
        template = mw.text.trim( template )
        template = ( template == "1" )
    end
    if template then
        args = frame:getParent().args
    else
        args = frame.args
    end
    return Text.listToText( args, frame.args.format )
end



function p.quote( frame )
    local slang = frame.args[2]
    if type( slang ) == "string" then
        slang = mw.text.trim( slang )
        if slang == "" then
            slang = false
        end
    end
    return Text.quote( frame.args[ 1 ] or "",
                       slang,
                       tonumber( frame.args[3] ) )
end



function p.quoteUnquoted( frame )
    local slang = frame.args[2]
    if type( slang ) == "string" then
        slang = mw.text.trim( slang )
        if slang == "" then
            slang = false
        end
    end
    return Text.quoteUnquoted( frame.args[ 1 ] or "",
                               slang,
                               tonumber( frame.args[3] ) )
end


function p.zip(frame)
    local lists = {}
    local seps = {}
    local defaultsep = frame.args["sep"] or ""
    local innersep = frame.args["isep"] or ""
    local outersep = frame.args["osep"] or ""

    -- Parameter parsen
    for k, v in pairs(frame.args) do
        local knum = tonumber(k)
        if knum then lists[knum] = v else
            if string.sub(k, 1, 3) == "sep" then
                local sepnum = tonumber(string.sub(k, 4))
                if sepnum then seps[sepnum] = v end
            end
        end
    end
    -- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden
    for i = 1, math.max(#seps, #lists) do
        if not seps[i] then seps[i] = defaultsep end
    end

    -- Listen splitten
    local maxListLen = 0
    for i = 1, #lists do
        lists[i] = mw.text.split(lists[i], seps[i])
        if #lists[i] > maxListLen then maxListLen = #lists[i] end
    end

    local result = ""
    for i = 1, maxListLen do
        if i ~= 1 then result = result .. outersep end
        for j = 1, #lists do
            if j ~= 1 then result = result .. innersep end
            result = result .. (lists[j][i] or "")
        end
    end
    return result
end


function p.split(frame)
	local text = frame.args.text or frame.args[1] or ''
	local pattern = frame.args.pattern or frame.args[2] or ''
	local plain = yesNo(frame.args.plain or frame.args[3])
	local index = tonumber(frame.args.index) or tonumber(frame.args[4]) or 1
	local a = Text.split(text, pattern, plain)
	if index < 0 then index = #a + index + 1 end
	return a[index]
end


function p.failsafe()
    return Text.serial
end


p.Text = function ()
    return Text
end -- p.Text

return p
"https://tamilar.wiki/w/index.php?title=Module:Text&oldid=232493" இலிருந்து மீள்விக்கப்பட்டது