Module:Webarchive: திருத்தங்களுக்கு இடையிலான வேறுபாடு
Jump to navigation
Jump to search
உள்ளடக்கம் நீக்கப்பட்டது உள்ளடக்கம் சேர்க்கப்பட்டது
sync from sandbox; |
|||
| வரிசை 1: | வரிசை 1: | ||
--[[ ---------------------------------- |
--[[ ---------------------------------- |
||
Lua module implementing the {{webarchive}} template. |
|||
A merger of the functionality of three templates: {{wayback}}, {{webcite}} and {{cite archives}} |
|||
]] |
|||
require('Module:No globals'); |
|||
local p = {} |
|||
local getArgs = require ('Module:Arguments').getArgs; |
|||
local this_page = mw.title.getCurrentTitle(); |
|||
--[[--------------------------< inlineError >----------------------- |
|||
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- |
|||
Critical error. Render output completely in red. Add to tracking category. |
|||
]] |
|||
local categories = {}; -- category names from ./data |
|||
]] |
|||
local err_warn_msgs = {}; -- error and warning messages from ./data |
|||
local excepted_pages = {}; |
|||
local prefixes = {}; -- service provider tail string prefixes from ./data |
|||
local services = {}; -- archive service provider data from ./data |
|||
local uncategorized_namespaces = {}; -- list of namespaces that we should not categorize |
|||
local uncategorized_subpages = {}; -- list of subpages that should not be categorized |
|||
local ulx = {}; -- Associative array to hold template data |
|||
local function inlineError(arg, msg) |
|||
local track = {}; -- Associative array to hold tracking categories |
|||
track["Category:Webarchive template errors"] = 1 |
|||
return '<span style="font-size:100%" class="error citation-comment">Error in webarchive template: Check <code style="color:inherit; border:inherit; padding:inherit;">|' .. arg .. '=</code> value. ' .. msg .. '</span>' |
|||
--[[--------------------------< G L O B A L C O N F I G U R A T I O N S E T T I N G S >-------------------- |
|||
end |
|||
]] |
|||
--[[--------------------------< inlineRed >----------------------- |
|||
local maxurls = 10; -- Max number of URLs allowed. |
|||
Render a text fragment in red, such as a warning as part of the final output. |
|||
local tname = 'Webarchive' -- name of calling template. Change if template rename. |
|||
Add tracking category. |
|||
local verifydates = 'yes' -- See documentation. Set "no" to disable. |
|||
]] |
|||
--[[--------------------------< inlineError >----------------------- |
|||
local function inlineRed(msg, trackmsg) |
|||
Critical error. Render output completely in red. Add to tracking category. |
|||
if trackmsg == "warning" then |
|||
track["Category:Webarchive template warnings"] = 1 |
|||
elseif trackmsg == "error" then |
|||
track["Category:Webarchive template errors"] = 1 |
|||
end |
|||
]] |
|||
return '<span style="font-size:100%" class="error citation-comment">' .. msg .. '</span>' |
|||
local function inlineError(arg, msg) |
|||
track[categories.error] = 1 |
|||
return '<span style="font-size:100%" class="error citation-comment">Error in webarchive template: Check <code style="color:inherit; border:inherit; padding:inherit;">|' .. arg .. '=</code> value. ' .. msg .. '</span>' |
|||
end |
end |
||
--[[--------------------------< trimArg >----------------------- |
|||
--[[--------------------------< inlineRed >----------------------- |
|||
trimArg returns nil if arg is "" while trimArg2 returns 'true' if arg is "" |
|||
trimArg2 is for args that might accept an empty value, as an on/off switch like nolink= |
|||
Render a text fragment in red, such as a warning as part of the final output. |
|||
Add tracking category. |
|||
]] |
]] |
||
local function |
local function inlineRed(msg, trackmsg) |
||
if trackmsg == "warning" then |
|||
track[categories.warning] = 1; |
|||
return nil |
|||
elseif trackmsg == "error" then |
|||
else |
|||
track[categories.error] = 1; |
|||
return mw.text.trim(arg) |
|||
end |
|||
end |
|||
return '<span style="font-size:100%" class="error citation-comment">' .. msg .. '</span>' |
|||
local function trimArg2(arg) |
|||
if arg == nil then |
|||
return nil |
|||
else |
|||
return mw.text.trim(arg) |
|||
end |
|||
end |
end |
||
--[[--------------------------< base62 >----------------------- |
--[[--------------------------< base62 >----------------------- |
||
Convert base-62 to base-10 |
|||
Credit: https://de.wikipedia.org/wiki/Modul:Expr |
|||
]] |
|||
local function base62( value ) |
local function base62( value ) |
||
local r = 1 -- default return value is input value is malformed |
|||
if value:match( "^%w+$" ) then -- value must only be in the set [0-9a-zA-Z] |
|||
local r = 1 |
|||
local n = #value -- number of characters in value |
|||
local k = 1 |
|||
if value:match( "^%w+$" ) then |
|||
local c |
|||
r = 0 |
|||
local k = 1 |
|||
for i = n, 1, -1 do -- loop through all characters in value from ls digit to ms digit |
|||
local c |
|||
c = value:byte( i, i ) |
|||
r = 0 |
|||
if c >= 48 and c <= 57 then -- character is digit 0-9 |
|||
for i = n, 1, -1 do |
|||
c = c - 48 |
|||
c = value:byte( i, i ) |
|||
elseif c >= 65 and c <= 90 then -- character is ascii a-z |
|||
c = c - 55 |
|||
else -- must be ascii A-Z |
|||
elseif c >= 65 and c <= 90 then |
|||
c = c - 61 |
|||
end |
|||
elseif c >= 97 and c <= 122 then |
|||
r = r + c * k -- accumulate this base62 character's value |
|||
c = c - 61 |
|||
k = k * 62 -- bump for next |
|||
else -- How comes? |
|||
end -- for i |
|||
r = 1 |
|||
end |
|||
break -- for i |
|||
return r |
|||
end |
|||
r = r + c * k |
|||
k = k * 62 |
|||
end -- for i |
|||
end |
|||
return r |
|||
end |
end |
||
--[[--------------------------< tableLength >----------------------- |
--[[--------------------------< tableLength >----------------------- |
||
Given a 1-D table, return number of elements |
|||
]] |
|||
local function tableLength(T) |
local function tableLength(T) |
||
local count = 0 |
|||
for _ in pairs(T) do count = count + 1 end |
|||
return count |
|||
end |
end |
||
| வரிசை 113: | வரிசை 113: | ||
--[[--------------------------< dateFormat >----------------------- |
--[[--------------------------< dateFormat >----------------------- |
||
Given a date string, return its format: dmy, mdy, iso, ymd |
|||
If unable to determine return nil |
|||
]] |
|||
local function dateFormat(date) |
local function dateFormat(date) |
||
local patterns = { |
|||
['iso'] = '(%d%d%d%d)%-%d%d%-%d%d', |
|||
dt.split = {} |
|||
['dmy'] = '%d%d? +%a+ +(%d%d%d%d)', |
|||
['mdy'] = '%a+ %d%d?, +(%d%d%d%d)', |
|||
['ymd'] = '(%d%d%d%d) +%a+ %d%d?', -- TODO: not mos compliant; delete? |
|||
}; |
|||
local form, y; |
|||
dt.split = mw.text.split(date, "-") |
|||
if tableLength(dt.split) == 3 then |
|||
if tonumber(dt.split[1]) > 1900 and tonumber(dt.split[1]) < 2200 and tonumber(dt.split[2]) and tonumber(dt.split[3]) then |
|||
return "iso" |
|||
else |
|||
return nil |
|||
end |
|||
end |
|||
for k, v in pairs (patterns) do -- loop through the patterns table |
|||
dt.split = mw.text.split(date, " ") |
|||
y = mw.ustring.match (date, v); -- looking for a match |
|||
if tableLength(dt.split) == 3 then |
|||
if y then -- not nil when found |
|||
if tonumber(dt.split[3]) then |
|||
form = k; -- save that |
|||
if tonumber(dt.split[3]) > 1900 and tonumber(dt.split[3]) < 2200 then |
|||
break; -- and done |
|||
if tonumber(dt.split[1]) then |
|||
end |
|||
return "dmy" |
|||
end |
|||
else |
|||
return "mdy" |
|||
end |
|||
else |
|||
if tonumber(dt.split[1]) then |
|||
if tonumber(dt.split[1]) > 1900 and tonumber(dt.split[1]) < 2200 then |
|||
return "ymd" |
|||
end |
|||
end |
|||
end |
|||
end |
|||
end |
|||
return nil |
|||
return (y and (1900 < tonumber(y) and 2200 > tonumber(y))) and form; -- TODO: why 1900? shouldn't that be birth-of-intenet year? why 2200? shouldn't that be current year? |
|||
end |
end |
||
--[[--------------------------< makeDate >----------------------- |
--[[--------------------------< makeDate >----------------------- |
||
Given a zero-padded 4-digit year, 2-digit month and 2-digit day, return a full date in df format |
|||
df = mdy, dmy, iso, ymd |
|||
on entry, year, month, day are presumed to be correct for the date that they represent; all are required |
|||
]] |
|||
]] |
|||
local function makeDate(year, month, day, df) |
local function makeDate(year, month, day, df) |
||
local format = { |
|||
['dmy'] = 'j F Y', |
|||
['mdy'] = 'F j, Y', |
|||
['ymd'] = 'Y F j', |
|||
['iso'] = 'Y-m-d', |
|||
}; |
|||
if not year or '' == year or not month or '' == month or not day or '' == day and format[df] then |
|||
return nil; |
|||
end |
|||
local date = table.concat ({year, month, day}, '-'); -- assemble iso format date |
|||
return mw.getContentLanguage():formatDate (format[df], date); |
|||
end |
|||
--[[--------------------------< I S _ V A L I D _ D A T E >---------------------------------------------------- |
|||
Returns true if date is after 31 December 1899 (why is 1900 the min year? shouldn't the internet's date-of-birth |
|||
be min year?), not after today's date, and represents a valid date (29 February 2017 is not a valid date). Applies |
|||
Gregorian leapyear rules. |
|||
all arguments are required |
|||
]] |
|||
local function is_valid_date (year, month, day) |
|||
if not year or year == "" or not month or month == "" or not day or day == "" then |
|||
local days_in_month = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; |
|||
return nil |
|||
local month_length; |
|||
end |
|||
local y, m, d; |
|||
local today = os.date ('*t'); -- fetch a table of current date parts |
|||
if not year or '' == year or not month or '' == month or not day or '' == day then |
|||
local zmonth = month -- month with leading 0 |
|||
return false; -- something missing |
|||
month = month:match("0*(%d+)") -- month without leading 0 |
|||
end |
|||
if tonumber(month) < 1 or tonumber(month) > 12 then |
|||
return year |
|||
y = tonumber (year); |
|||
end |
|||
m = tonumber (month); |
|||
local nmonth = os.date("%B", os.time{year=2000, month=month, day=1} ) -- month in name form |
|||
d = tonumber (day); |
|||
if not nmonth then |
|||
return year |
|||
end |
|||
if 1900 > y or today.year < y or 1 > m or 12 < m then -- year and month are within bounds TODO: 1900? |
|||
local zday = day |
|||
return false; |
|||
day = zday:match("0*(%d+)") |
|||
end |
|||
if tonumber(day) < 1 or tonumber(day) > 31 then |
|||
if df == "mdy" or df == "dmy" then |
|||
return nmonth .. " " .. year |
|||
elseif df == "iso" then |
|||
return year .. "-" .. zmonth |
|||
elseif df == "ymd" then |
|||
return year .. " " .. nmonth |
|||
else |
|||
return nmonth .. " " .. year |
|||
end |
|||
end |
|||
if (2==m) then -- if February |
|||
if df == "mdy" then |
|||
month_length = 28; -- then 28 days unless |
|||
return nmonth .. " " .. day .. ", " .. year -- September 1, 2016 |
|||
if (0==(y%4) and (0~=(y%100) or 0==(y%400))) then -- is a leap year? |
|||
elseif df == "dmy" then |
|||
month_length = 29; -- if leap year then 29 days in February |
|||
return day .. " " .. nmonth .. " " .. year -- 1 September 2016 |
|||
end |
|||
elseif df == "iso" then |
|||
else |
|||
return year .. "-" .. zmonth .. "-" .. zday -- 2016-09-01 |
|||
month_length=days_in_month[m]; |
|||
elseif df == "ymd" then |
|||
end |
|||
return year .. " " .. nmonth .. " " .. day -- 2016 September 1 |
|||
else |
|||
return nmonth .. " " .. day .. ", " .. year -- September 1, 2016 |
|||
end |
|||
if 1 > d or month_length < d then -- day is within bounds |
|||
return false; |
|||
end |
|||
-- here when date parts represent a valid date |
|||
return os.time({['year']=y, ['month']=m, ['day']=d, ['hour']=0}) <= os.time(); -- date at midnight must be less than or equal to current date/time |
|||
end |
end |
||
| வரிசை 208: | வரிசை 214: | ||
--[[--------------------------< decodeWebciteDate >----------------------- |
--[[--------------------------< decodeWebciteDate >----------------------- |
||
Given a URI-path to Webcite (eg. /67xHmVFWP) return the encoded date in df format |
|||
]] |
|||
]] |
|||
local function decodeWebciteDate(path, df) |
local function decodeWebciteDate(path, df) |
||
local dt = {} |
|||
dt.split = {} |
|||
dt = mw.text.split(path, "/") |
|||
-- valid URL formats that are not base62 |
|||
-- http://www.webcitation.org/query?id=1138911916587475 |
|||
-- http://www.webcitation.org/query?url=http..&date=2012-06-01+21:40:03 |
|||
-- http://www.webcitation.org/1138911916587475 |
|||
-- http://www.webcitation.org/cache/73e53dd1f16cf8c5da298418d2a6e452870cf50e |
|||
-- http://www.webcitation.org/getfile.php?fileid=1c46e791d68e89e12d0c2532cc3cf629b8bc8c8e |
|||
if mw.ustring.find( dt[2], "query", 1, true) or |
|||
mw.ustring.find( dt[2], "cache", 1, true) or |
|||
mw.ustring.find( dt[2], "getfile", 1, true) or |
|||
tonumber(dt[2]) then |
|||
return "query" |
|||
end |
|||
dt = os.date('*t', string.format("%d", base62(dt[2])):sub(1,10)) -- base62 string -> exponential number -> text -> first 10 characters -> a table of date parts |
|||
dt.split = mw.text.split(dt.full, " ") |
|||
dt.year = dt.split[1] |
|||
dt.month = dt.split[2] |
|||
dt.day = dt.split[3] |
|||
if not tonumber(dt.year) or not tonumber(dt.month) or not tonumber(dt.day) then |
|||
return inlineRed("[Date error] (1)", "error") |
|||
end |
|||
if not is_valid_date (dt.year, dt.month, dt.day) then |
|||
if tonumber(dt.month) > 12 or tonumber(dt.day) > 31 or tonumber(dt.month) < 1 then |
|||
return inlineRed(err_warn_msgs.date_err, 'error'); |
|||
end |
|||
if tonumber(dt.year) > tonumber(os.date("%Y")) or tonumber(dt.year) < 1900 then |
|||
return makeDate(dt.year, dt.month, dt.day, df) or inlineRed (err_warn_msgs.date4, 'error'); |
|||
end |
|||
local fulldate = makeDate(dt.year, dt.month, dt.day, df) |
|||
if not fulldate then |
|||
return inlineRed("[Date error] (4)", "error") |
|||
else |
|||
return fulldate |
|||
end |
|||
end |
|||
--[[--------------------------< decodeWaybackDate >----------------------- |
--[[--------------------------< decodeWaybackDate >----------------------- |
||
Given a URI-path to Wayback (eg. /web/20160901010101/http://example.com ) |
Given a URI-path to Wayback (eg. /web/20160901010101/http://example.com ) |
||
or Library of Congress Web Archives (/all/20160901010101/http://example.com) |
|||
return the formatted date eg. "September 1, 2016" in df format |
|||
Handle non-digits in snapshot ID such as "re_" and "-" and "*" |
|||
]] |
|||
local function decodeWaybackDate(path, df) |
local function decodeWaybackDate(path, df) |
||
local msg, snapdate; |
|||
local snapdate, snapdatelong, currdate, fulldate |
|||
snapdate = path:gsub ('^/all/', ''):gsub ('^/web/', ''):gsub ('^/', ''); -- remove leading '/all/', leading '/web/' or leading '/' |
|||
local safe = path |
|||
snapdate = snapdate:match ('^[^/]+'); -- get timestamp |
|||
snapdate = string.gsub(safe, "^/all/", "") -- Remove leading "/all/" |
|||
if snapdate == "*" then -- eg. /web/*/http.. or /all/*/http.. |
|||
safe = snapdate |
|||
return "index" |
|||
snapdate = string.gsub(safe, "^/w?e?b?/?", "") -- Remove leading "/web/" or "/" |
|||
end |
|||
safe = snapdate |
|||
local N = mw.text.split(safe, "/") |
|||
snapdate = N[1] |
|||
if snapdate == "*" then -- eg. /web/*/http.. or /all/*/http.. |
|||
return "index" |
|||
end |
|||
safe = snapdate |
|||
snapdate = string.gsub(safe, "[a-z][a-z]_[0-9]?$", "") -- Remove any trailing "re_" from date |
|||
safe = snapdate |
|||
snapdate = string.gsub(safe, "[-]", "") -- Remove dashes from date eg. 2015-01-01 |
|||
safe = snapdate |
|||
snapdate = string.gsub(safe, "[*]$", "") -- Remove trailing "*" |
|||
snapdate = snapdate:gsub ('%a%a_%d?$', ''):gsub ('%-', ''); -- from date, remove any trailing "re_", dashes |
|||
if not tonumber(snapdate) then |
|||
return inlineRed("[Date error] (2)", "error") |
|||
end |
|||
local dlen = string.len(snapdate) |
|||
if dlen < 4 then |
|||
return inlineRed("[Date error] (3)", "error") |
|||
end |
|||
if dlen < 14 then |
|||
snapdatelong = snapdate .. string.rep("0", 14 - dlen) |
|||
else |
|||
snapdatelong = snapdate |
|||
end |
|||
local year = string.sub(snapdatelong, 1, 4) |
|||
local month = string.sub(snapdatelong, 5, 6) |
|||
local day = string.sub(snapdatelong, 7, 8) |
|||
if not tonumber(year) or not tonumber(month) or not tonumber(day) then |
|||
return inlineRed("[Date error] (4)", "error") |
|||
end |
|||
if tonumber(month) > 12 or tonumber(day) > 31 or tonumber(month) < 1 then |
|||
return inlineRed("[Date error] (5)", "error") |
|||
end |
|||
currdate = os.date("%Y") |
|||
if tonumber(year) > tonumber(currdate) or tonumber(year) < 1900 then |
|||
return inlineRed("[Date error] (6)", "error") |
|||
end |
|||
msg = ''; |
|||
fulldate = makeDate(year, month, day, df) |
|||
if snapdate:match ('%*$') then -- a trailing '*' causes calendar display at archive .org |
|||
if not fulldate then |
|||
snapdate = snapdate:gsub ('%*$', ''); -- remove so not part of length calc later |
|||
return inlineRed("[Date error] (7)", "error") |
|||
-- msg = inlineRed(err_warn_msgs.ts_cal, 'warning'); -- TODO: enable this -- make a message |
|||
else |
|||
end |
|||
return fulldate |
|||
end |
|||
if not tonumber(snapdate) then |
|||
return inlineRed (err_warn_msgs.date2, 'error'); |
|||
end |
|||
local dlen = string.len(snapdate) |
|||
if dlen < 8 then -- we need 8 digits TODO: but shouldn't this be testing for 14 digits? |
|||
return inlineRed (err_warn_msgs.date3, 'error'); |
|||
end |
|||
local year, month, day = snapdate:match ('(%d%d%d%d)(%d%d)(%d%d)'); -- no need for snapdatelong here |
|||
if not is_valid_date (year, month, day) then |
|||
return inlineRed(err_warn_msgs.date_err, 'error'); |
|||
end |
|||
return makeDate(year, month, day, df) or inlineRed (err_warn_msgs.date7, 'error'); |
|||
--[[ snapdate = makeDate(year, month, day, df); -- TODO: enable this |
|||
if snapdate then |
|||
if 14 == dlen then |
|||
return snapdate, msg; -- return date with message if any |
|||
else |
|||
return snapdate, msg .. inlineRed(err_warn_msgs.ts_len, 'warning'); -- return date with warning message(s) |
|||
end |
|||
else |
|||
return inlineRed (err_warn_msgs.date7, 'error'); -- return error message |
|||
end |
|||
]] |
|||
end |
end |
||
--[[--------------------------< decodeArchiveisDate >----------------------- |
--[[--------------------------< decodeArchiveisDate >----------------------- |
||
Given an Archive.is "long link" URI-path (e.g. /2016.08.28-144552/http://example.com) |
|||
return the date in df format (e.g. if df = dmy, return 28 August 2016) |
|||
Handles "." and "-" in snapshot date, so 2016.08.28-144552 is same as 20160828144552 |
|||
]] |
|||
local function decodeArchiveisDate(path, df) |
local function decodeArchiveisDate(path, df) |
||
local snapdate |
|||
if path:match ('^/%w+$') then -- short form url path is '/' followed by some number of base 62 digits and nothing else |
|||
return "short link" -- e.g. http://archive.is/hD1qz |
|||
end |
|||
snapdate = mw.text.split (path, '/')[2]:gsub('[%.%-]', ''); -- get snapshot date, e.g. 2016.08.28-144552; remove periods and hyphens |
|||
local snapdate, snapdatelong, currdate, fulldate |
|||
local dlen = string.len(snapdate) |
|||
if dlen < 8 then -- we need 8 digits TODO: but shouldn't this be testing for 14 digits? |
|||
local N = mw.text.split(safe, "/") |
|||
return inlineRed (err_warn_msgs.date3, 'error'); |
|||
safe = N[2] -- get snapshot date, e.g. 2016.08.28-144552 |
|||
end |
|||
snapdate = string.gsub(safe, "[%.%-]", "") -- remove periods and hyphens |
|||
local year, month, day = snapdate:match ('(%d%d%d%d)(%d%d)(%d%d)'); -- no need for snapdatelong here |
|||
if not tonumber(snapdate) then -- if not numeric, it is "short link", not date |
|||
return "short link" -- e.g. http://archive.is/hD1qz |
|||
end |
|||
if not is_valid_date (year, month, day) then |
|||
local dlen = string.len(snapdate) |
|||
return inlineRed(err_warn_msgs.date_err, 'error'); |
|||
if dlen < 4 then |
|||
end |
|||
return inlineRed("[Date error] (3)", "error") |
|||
end |
|||
if dlen < 14 then |
|||
snapdatelong = snapdate .. string.rep("0", 14 - dlen) |
|||
else |
|||
snapdatelong = snapdate |
|||
end |
|||
local year = string.sub(snapdatelong, 1, 4) |
|||
local month = string.sub(snapdatelong, 5, 6) |
|||
local day = string.sub(snapdatelong, 7, 8) |
|||
if not tonumber(year) or not tonumber(month) or not tonumber(day) then |
|||
return inlineRed("[Date error] (4)", "error") |
|||
end |
|||
if tonumber(month) > 12 or tonumber(day) > 31 or tonumber(month) < 1 then |
|||
return inlineRed("[Date error] (5)", "error") |
|||
end |
|||
currdate = os.date("%Y") |
|||
if tonumber(year) > tonumber(currdate) or tonumber(year) < 1900 then |
|||
return inlineRed("[Date error] (6)", "error") |
|||
end |
|||
------ return makeDate(year, month, day, df) or inlineRed (err_warn_msgs.date7, 'error'); |
|||
if not fulldate then |
|||
return inlineRed("[Date error] (7)", "error") |
|||
else |
|||
return fulldate |
|||
end |
|||
snapdate = makeDate(year, month, day, df); -- TODO: enable this |
|||
if snapdate then |
|||
if 14 == dlen then |
|||
return snapdate; -- return date |
|||
else |
|||
return snapdate, inlineRed(err_warn_msgs.ts_len, 'warning'); -- return date with warning message |
|||
end |
|||
else |
|||
return inlineRed (err_warn_msgs.date7, 'error'); -- return error message |
|||
end |
|||
end |
end |
||
--[[ |
--[=[-------------------------< M A K E _ W I K I L I N K >---------------------------------------------------- |
||
Makes a wikilink; when both link and display text is provided, returns a wikilink in the form [[L|D]]; if only |
|||
Given a domain extracted by mw.uri.new() (eg. web.archive.org) set tail string and service ID |
|||
link is provided, returns a wikilink in the form [[L]]; if neither are provided or link is omitted, returns an |
|||
empty string. |
|||
]=] |
|||
local function |
local function make_wikilink (link, display, no_link) |
||
if nil == no_link then |
|||
if link and ('' ~= link) then |
|||
if display and ('' ~= display) then |
|||
return table.concat ({'[[', link, '|', display, ']]'}); |
|||
else |
|||
return table.concat ({'[[', link, ']]'}); |
|||
end |
|||
end |
|||
return display or ''; -- link not set so return the display text |
|||
else -- no_link |
|||
local tracking = "Category:Webarchive template other archives" |
|||
if display and ('' ~= display) then -- if there is display text |
|||
return display; -- return that |
|||
else |
|||
return link or ''; -- return the target article name or empty string |
|||
end |
|||
end |
|||
end |
|||
local bracketopen = "[[" |
|||
local bracketclose = "]]" |
|||
if nolink then |
|||
bracketopen = "" |
|||
bracketclose = "" |
|||
end |
|||
--[[--------------------------< serviceName >----------------------- |
|||
ulx.url1.service = "other" |
|||
ulx.url1.tail = " at " .. ulx.url1.host .. " " .. inlineRed("Error: unknown archive URL") |
|||
Given a domain extracted by mw.uri.new() (eg. web.archive.org) set tail string and service ID |
|||
host = string.lower(host) |
|||
]] |
|||
if mw.ustring.find( host, "europarchive.org", 1, true ) then -- any containing "archive.org" listed before Wayback to avoid disambiguation |
|||
ulx.url1.tail = " at the " .. bracketopen .. "National Library of Ireland" .. bracketclose |
|||
elseif mw.ustring.find( host, "webarchive.org.uk", 1, true ) then |
|||
ulx.url1.tail = " at the " .. bracketopen .. "UK Web Archive" .. bracketclose |
|||
elseif mw.ustring.find( host, "archive.org", 1, true ) then |
|||
ulx.url1.service = "wayback" |
|||
ulx.url1.tail = " at the " .. bracketopen .. "Wayback Machine" .. bracketclose |
|||
tracking = "Category:Webarchive template wayback links" |
|||
elseif mw.ustring.find( host, "webcitation.org", 1, true ) then |
|||
ulx.url1.service = "webcite" |
|||
ulx.url1.tail = " at " .. bracketopen .. "WebCite" .. bracketclose |
|||
tracking = "Category:Webarchive template webcite links" |
|||
elseif mw.ustring.find( host, "archive.is", 1, true ) then |
|||
ulx.url1.service = "archiveis" |
|||
ulx.url1.tail = " at " .. bracketopen .. "Archive.is" .. bracketclose |
|||
tracking = "Category:Webarchive template archiveis links" |
|||
elseif mw.ustring.find( host, "archive.fo", 1, true ) then |
|||
ulx.url1.service = "archiveis" |
|||
ulx.url1.tail = " at " .. bracketopen .. "Archive.is" .. bracketclose |
|||
tracking = "Category:Webarchive template archiveis links" |
|||
elseif mw.ustring.find( host, "archive.today", 1, true ) then |
|||
ulx.url1.service = "archiveis" |
|||
ulx.url1.tail = " at " .. bracketopen .. "Archive.is" .. bracketclose |
|||
tracking = "Category:Webarchive template archiveis links" |
|||
elseif mw.ustring.find( host, "archive.li", 1, true ) then |
|||
ulx.url1.service = "archiveis" |
|||
ulx.url1.tail = " at " .. bracketopen .. "Archive.is" .. bracketclose |
|||
tracking = "Category:Webarchive template archiveis links" |
|||
elseif mw.ustring.find( host, "archive.ec", 1, true ) then |
|||
ulx.url1.service = "archiveis" |
|||
ulx.url1.tail = " at " .. bracketopen .. "Archive.is" .. bracketclose |
|||
tracking = "Category:Webarchive template archiveis links" |
|||
elseif mw.ustring.find( host, "archive-it.org", 1, true ) then |
|||
ulx.url1.service = "archiveit" |
|||
ulx.url1.tail = " at " .. bracketopen .. "Archive-It" .. bracketclose |
|||
elseif mw.ustring.find( host, "wikiwix.com", 1, true ) then |
|||
ulx.url1.tail = " at Wikiwix" |
|||
elseif mw.ustring.find( host, "arquivo.pt", 1, true) then |
|||
ulx.url1.tail = " at the " .. "Portuguese Web Archive" |
|||
elseif mw.ustring.find( host, "webarchive.loc.gov", 1, true ) then |
|||
ulx.url1.service = "locwebarchives" |
|||
ulx.url1.tail = " at the " .. bracketopen .. "Library of Congress" .. bracketclose .. " Web Archives" |
|||
elseif mw.ustring.find( host, "loc.gov", 1, true ) then |
|||
ulx.url1.tail = " at the " .. bracketopen .. "Library of Congress" .. bracketclose |
|||
elseif mw.ustring.find( host, "webharvest.gov", 1, true ) then |
|||
ulx.url1.tail = " at the " .. bracketopen .. "National Archives and Records Administration" .. bracketclose |
|||
elseif mw.ustring.find( host, "bibalex.org", 1, true ) then |
|||
ulx.url1.tail = " at " .. "[[Bibliotheca_Alexandrina#Internet_Archive_partnership|Bibliotheca Alexandrina]]" |
|||
elseif mw.ustring.find( host, "collectionscanada", 1, true ) then |
|||
ulx.url1.tail = " at the " .. "Canadian Government Web Archive" |
|||
elseif mw.ustring.find( host, "haw.nsk", 1, true ) then |
|||
ulx.url1.tail = " at the " .. "Croatian Web Archive (HAW)" |
|||
elseif mw.ustring.find( host, "veebiarhiiv.digar.ee", 1, true ) then |
|||
ulx.url1.tail = " at the " .. "Estonian Web Archive" |
|||
elseif mw.ustring.find( host, "vefsafn.is", 1, true ) then |
|||
ulx.url1.tail = " at the " .. "[[National and University Library of Iceland]]" |
|||
elseif mw.ustring.find( host, "proni.gov", 1, true ) then |
|||
ulx.url1.tail = " at the " .. bracketopen .. "Public Record Office of Northern Ireland" .. bracketclose |
|||
elseif mw.ustring.find( host, "uni-lj.si", 1, true ) then |
|||
ulx.url1.tail = " at the " .. "Slovenian Web Archive" |
|||
elseif mw.ustring.find( host, "stanford.edu", 1, true ) then |
|||
ulx.url1.tail = " at the " .. "[[Stanford University Libraries|Stanford Web Archive]]" |
|||
elseif mw.ustring.find( host, "nationalarchives.gov.uk", 1, true ) then |
|||
ulx.url1.tail = " at the " .. bracketopen .. "UK Government Web Archive" .. bracketclose |
|||
elseif mw.ustring.find( host, "parliament.uk", 1, true ) then |
|||
ulx.url1.tail = " at the " .. bracketopen .. "UK Parliament's Web Archive" .. bracketclose |
|||
elseif mw.ustring.find( host, "nlb.gov.sg", 1, true ) then |
|||
ulx.url1.tail = " at " .. "Web Archive Singapore" |
|||
elseif mw.ustring.find( host, "pandora.nla.gov.au", 1, true ) then |
|||
ulx.url1.tail = " at " .. bracketopen .. "Pandora Archive" .. bracketclose |
|||
elseif mw.ustring.find( host, "perma.cc", 1, true ) then |
|||
ulx.url1.tail = " at " .. bracketopen .. "Perma.cc" .. bracketclose |
|||
elseif mw.ustring.find( host, "perma-archives.cc", 1, true ) then |
|||
ulx.url1.tail = " at " .. bracketopen .. "Perma.cc" .. bracketclose |
|||
elseif mw.ustring.find( host, "screenshots.com", 1, true ) then |
|||
ulx.url1.tail = " at Screenshots" |
|||
elseif mw.ustring.find( host, "freezepage.com", 1, true ) then |
|||
ulx.url1.tail = " at Freezepage" |
|||
elseif mw.ustring.find( host, "yorku.ca", 1, true ) then |
|||
ulx.url1.tail = " at " .. "[[York University Libraries|York University Digital Library]]" |
|||
elseif mw.ustring.find( host, "webcache.googleusercontent.com", 1, true ) then |
|||
ulx.url1.tail = " at Google Cache" |
|||
elseif mw.ustring.find( host, "timetravel.mementoweb.org", 1, true ) then |
|||
ulx.url1.tail = " at " .. bracketopen .. "Memento Project" .. bracketclose |
|||
elseif mw.ustring.find( host, "langzeitarchivierung.bib-bvb.de", 1, true ) then |
|||
ulx.url1.tail = " at " .. bracketopen .. "Bavarian State Library" .. bracketclose |
|||
elseif mw.ustring.find( host, "webrecorder.io", 1, true ) then |
|||
ulx.url1.tail = " at " .. bracketopen .. "webrecorder.io" .. bracketclose |
|||
elseif mw.ustring.find( host, "webarchive.bac-lac.gc.ca", 1, true ) then |
|||
ulx.url1.tail = " at " .. bracketopen .. "Library and Archives Canada" .. bracketclose |
|||
else |
|||
tracking = "Category:Webarchive template unknown archives" |
|||
end |
|||
local function serviceName(host, no_link) |
|||
track[tracking] = 1 |
|||
local tracking; |
|||
local index; |
|||
host = host:lower():gsub ('^web%.(.+)', '%1'):gsub ('^www%.(.+)', '%1'); -- lowercase, remove web. and www. subdomains |
|||
if services[host] then |
|||
index = host; |
|||
else |
|||
for k, _ in pairs (services) do |
|||
if host:find ('%f[%a]'..k:gsub ('([%.%-])', '%%%1')) then |
|||
index = k; |
|||
break; |
|||
end |
|||
end |
|||
end |
|||
if index then |
|||
local out = {''}; -- empty string in [1] so that concatenated result has leading single space |
|||
ulx.url1.service = services[index][4] or 'other'; |
|||
tracking = services[index][5] or categories.other; |
|||
-- build tail string |
|||
if false == services[index][1] then -- select prefix |
|||
table.insert (out, prefixes.at); |
|||
elseif true == services[index][1] then |
|||
table.insert (out, prefixes.atthe); |
|||
else |
|||
table.insert (out, services[index][1]); |
|||
end |
|||
table.insert (out, make_wikilink (services[index][2], services[index][3], no_link)); -- add article wikilink |
|||
if services[index][6] then -- add tail postfix if it exists |
|||
table.insert (out, services[index][6]); |
|||
end |
|||
ulx.url1.tail = table.concat (out, ' '); -- put it all together; result has leading space character |
|||
else -- here when unknown archive |
|||
ulx.url1.service = 'other'; |
|||
tracking = categories.unknown; |
|||
ulx.url1.tail = table.concat ({'', prefixes.at, host, inlineRed (err_warn_msgs.unknown_url)}, ' '); -- TODO: call to inlineRed() does not specify 'error' or 'warning'; should it? |
|||
end |
|||
track[tracking] = 1 |
|||
end |
end |
||
--[[--------------------------< parseExtraArgs >----------------------- |
--[[--------------------------< parseExtraArgs >----------------------- |
||
Parse numbered arguments starting at 2, such as url2..url10, date2..date10, title2..title10 |
|||
For example: {{webarchive |url=.. |url4=.. |url7=..}} |
|||
Three url arguments not in numeric sequence (1..4..7). |
|||
Function only processes arguments numbered 2 or greater (in this case 4 and 7) |
|||
It creates numeric sequenced table entries like: |
|||
urlx.url2.url = <argument value for url4> |
|||
urlx.url3.url = <argument value for url7> |
|||
Returns the number of URL arguments found numbered 2 or greater (in this case returns "2") |
|||
]] |
]] |
||
local function parseExtraArgs() |
local function parseExtraArgs(args) |
||
local i, j, argurl, argurl2, argdate, argtitle |
|||
local i, j, argurl, argurl2, argdate, argtitle |
|||
j = 2 |
|||
for i = 2, maxurls do |
|||
argurl = "url" .. i |
|||
if trimArg(args[argurl]) then |
|||
argurl2 = "url" .. j |
|||
ulx[argurl2] = {} |
|||
ulx[argurl2]["url"] = args[argurl] |
|||
argdate = "date" .. j |
|||
if trimArg(args[argdate]) then |
|||
ulx[argurl2]["date"] = args[argdate] |
|||
else |
|||
ulx[argurl2]["date"] = inlineRed("[Date missing]", "warning") |
|||
end |
|||
argtitle = "title" .. j |
|||
if trimArg(args[argtitle]) then |
|||
ulx[argurl2]["title"] = args[argtitle] |
|||
else |
|||
ulx[argurl2]["title"] = nil |
|||
end |
|||
j = j + 1 |
|||
end |
|||
end |
|||
j = 2 |
|||
for i = 2, maxurls do |
|||
argurl = "url" .. i |
|||
else |
|||
if args[argurl] then |
|||
return j - 2 |
|||
argurl2 = "url" .. j |
|||
end |
|||
ulx[argurl2] = {} |
|||
ulx[argurl2]["url"] = args[argurl] |
|||
argdate = "date" .. j |
|||
if args[argdate] then |
|||
ulx[argurl2]["date"] = args[argdate] |
|||
else |
|||
ulx[argurl2]["date"] = inlineRed (err_warn_msgs.date_miss, 'warning'); |
|||
end |
|||
argtitle = "title" .. j |
|||
if args[argtitle] then |
|||
ulx[argurl2]["title"] = args[argtitle] |
|||
else |
|||
ulx[argurl2]["title"] = nil |
|||
end |
|||
j = j + 1 |
|||
end |
|||
end |
|||
if j == 2 then |
|||
return 0 |
|||
else |
|||
return j - 2 |
|||
end |
|||
end |
end |
||
--[[--------------------------< comma >----------------------- |
--[[--------------------------< comma >----------------------- |
||
Given a date string, return "," if it's MDY |
|||
]] |
|||
local function comma(date) |
local function comma(date) |
||
return (date and date:match ('%a+ +%d%d?(,) +%d%d%d%d')) or ''; |
|||
local N = mw.text.split(date, " ") |
|||
local O = mw.text.split(N[1], "-") -- for ISO |
|||
if O[1] == "index" then return "" end |
|||
if not tonumber(O[1]) then |
|||
return "," |
|||
else |
|||
return "" |
|||
end |
|||
end |
end |
||
--[[--------------------------< createTracking >----------------------- |
--[[--------------------------< createTracking >----------------------- |
||
Return data in track[] ie. tracking categories |
|||
]] |
|||
local function createTracking() |
local function createTracking() |
||
if not excepted_pages[this_page.fullText] then -- namespace:title/fragment is allowed to be categorized (typically this module's / template's testcases page(s)) |
|||
if uncategorized_namespaces[this_page.nsText] then -- TODO: enable this chunk |
|||
return ''; -- this page not to be categorized so return empty string |
|||
end |
|||
for _,v in ipairs (uncategorized_subpages) do -- cycle through page name patterns |
|||
if this_page.text:match (v) then -- test page name against each pattern |
|||
return ''; -- this subpage type not to be categorized so return empty string |
|||
end |
|||
end |
|||
end |
|||
local out = {}; |
|||
if tableLength(track) > 0 then |
|||
for key, _ in pairs(track) do -- loop through table |
|||
table.insert (out, make_wikilink (key)); -- and convert category names to links |
|||
sand = sand .. "[[" .. key .. "]]" |
|||
end |
|||
end |
|||
return table.concat (out); -- concat into one big string; empty string if table is empty |
|||
return sand |
|||
end |
end |
||
--[[--------------------------< createRendering >----------------------- |
--[[--------------------------< createRendering >----------------------- |
||
Return a rendering of the data in ulx[][] |
|||
TODO: when archive date is '*' ('index') leading archive extlink should be [<url> Archive index] instead of |
|||
[<url> Archived] index; code to support this has been added but is commented out for the time being; look for TODO1 |
|||
]] |
|||
local function createRendering() |
local function createRendering() |
||
local displayfield |
|||
local out = {}; |
|||
local period1 = ''; -- For backwards compat with {{wayback}} |
|||
local period2 = '.'; |
|||
if 'none' == ulx.url1.format then -- For {{wayback}}, {{webcite}} |
|||
local period1 = "" -- For backwards compat with {{wayback}} |
|||
table.insert (out, '['); -- open extlink markup |
|||
local period2 = "." |
|||
table.insert (out, ulx.url1.url); -- add url |
|||
local indexstr = "archived" |
|||
if ulx.url1.date == "index" then |
|||
indexstr = "archive" |
|||
end |
|||
-- For {{wayback}}, {{webcite}} |
|||
if ulx.url1.title then |
|||
if ulx.url1.format == "none" then |
|||
table.insert (out, ' ') -- the required space |
|||
if not ulx.url1.title and not ulx.url1.date then -- No title. No date |
|||
table.insert (out, ulx.url1.title) -- the title |
|||
sand = "[" .. ulx.url1.url .. " Archived]" .. ulx.url1.tail |
|||
table.insert (out, ']'); -- close extlink markup |
|||
elseif not ulx.url1.title and ulx.url1.date then -- No title. Date. |
|||
table.insert (out, ulx.url1.tail); -- tail text |
|||
if ulx.url1.service == "wayback" then |
|||
if ulx.url1.date then |
|||
period1 = "." |
|||
table.insert (out, ' ('); -- open date text; TODO: why the html entity? |
|||
period2 = "" |
|||
table.insert (out, 'index' == ulx.url1.date and 'archive' or 'archived'); -- add text |
|||
end |
|||
table.insert (out, ' '); -- insert a space |
|||
sand = "[" .. ulx.url1.url .. " Archived] " .. ulx.url1.date .. comma(ulx.url1.date) .. ulx.url1.tail .. period1 |
|||
table.insert (out, ulx.url1.date); -- add date |
|||
table.insert (out, ')'); -- close date text |
|||
sand = "[" .. ulx.url1.url .. " " .. ulx.url1.title .. "]" .. ulx.url1.tail |
|||
end |
|||
elseif ulx.url1.title and ulx.url1.date then -- Title. Date. |
|||
else -- no title |
|||
sand = "[" .. ulx.url1.url .. " " .. ulx.url1.title .. "]" .. ulx.url1.tail .. " (" .. indexstr .. " " .. ulx.url1.date .. ")" |
|||
table.insert (out, ' Archived]') -- close extlink markup TODO1: remove this line |
|||
else |
|||
--TODO1 table.insert (out, 'index' == ulx.url1.date and ' Archive index]' or ' Archived]'); -- begin link label-- use this line for correct link label when date is 'index' |
|||
return nil |
|||
if ulx.url1.date then |
|||
end |
|||
if 'wayback' == ulx.url1.service then |
|||
if ulx.url1.extraurls > 0 then -- For multiple archive URLs |
|||
period1 = '.'; |
|||
local tot = ulx.url1.extraurls + 1 |
|||
period2 = ''; |
|||
sand = sand .. period2 .. " Additional archives: " |
|||
end |
|||
for i=2,tot do |
|||
table.insert (out, table.concat ({' ', ulx.url1.date})); -- add date TODO1: remove this line |
|||
local indx = "url" .. i |
|||
--[[TODO1 if 'index' ~= ulx.url1.date then -- TODO1: add this line -- use this if for correct link label when date is 'index' |
|||
if ulx[indx]["title"] then |
|||
table.insert (out, ulx.url1.date); -- add date TODO1: add this line -- use this if for correct link label when date is 'index' |
|||
displayfield = "title" |
|||
end -- TODO1: add this line -- use this if for correct link label when date is 'index' |
|||
else |
|||
]] table.insert (out, comma(ulx.url1.date)); -- add ',' if date format is mdy |
|||
displayfield = "date" |
|||
table.insert (out, ulx.url1.tail); -- add tail text |
|||
end |
|||
table.insert (out, period1); -- terminate |
|||
sand = sand .. "[" .. ulx[indx]["url"] .. " " .. ulx[indx][displayfield] .. "]" |
|||
else -- no date |
|||
if i == tot then |
|||
table.insert (out, ulx.url1.tail); -- add tail text |
|||
sand = sand .. "." |
|||
end |
|||
else |
|||
end |
|||
sand = sand .. ", " |
|||
end |
|||
end |
|||
else |
|||
return sand |
|||
end |
|||
return sand |
|||
-- For {{cite archives}} |
|||
if 0 < ulx.url1.extraurls then -- For multiple archive URLs |
|||
else |
|||
local tot = ulx.url1.extraurls + 1 |
|||
if ulx.url1.format == "addlarchives" then -- Multiple archive services |
|||
table.insert (out, period2); -- terminate first url |
|||
displayheader = "Additional archives: " |
|||
table.insert (out, ' Additional archives: '); -- add header text |
|||
else -- Multiple pages from the same archive |
|||
displayheader = "Additional pages archived on " .. ulx.url1.date .. ": " |
|||
for i=2, tot do -- loop through the additionals |
|||
end |
|||
local index = table.concat ({'url', i}); -- make an index |
|||
local tot = 1 + ulx.url1.extraurls |
|||
displayfield = ulx[index]['title'] and 'title' or 'date'; -- choose display text |
|||
local sand = displayheader |
|||
table.insert (out, '['); -- open extlink markup |
|||
for i=1,tot do |
|||
table.insert (out, ulx[index]['url']); -- add the url |
|||
local indx = "url" .. i |
|||
table.insert (out, ' '); -- the required space |
|||
displayfield = ulx[indx]["title"] |
|||
table.insert (out, ulx[index][displayfield]); -- add the label |
|||
if ulx.url1.format == "addlarchives" then |
|||
table.insert (out, ']'); -- close extlink markup |
|||
if not displayfield then |
|||
table.insert (out, i==tot and '.' or ', '); -- add terminator |
|||
displayfield = ulx[indx]["date"] |
|||
end |
|||
end |
|||
else |
|||
return table.concat (out); -- make a big string and done |
|||
if not displayfield then |
|||
displayfield = "Page " .. i |
|||
else -- For {{cite archives}} |
|||
end |
|||
if 'addlarchives' == ulx.url1.format then -- Multiple archive services |
|||
end |
|||
table.insert (out, 'Additional archives: '); -- add header text |
|||
sand = sand .. "[" .. ulx[indx]["url"] .. " " .. displayfield .. "]" |
|||
else -- Multiple pages from the same archive |
|||
if i == tot then |
|||
table.insert (out, 'Additional pages archived on '); -- add header text |
|||
sand = sand .. "." |
|||
table.insert (out, ulx.url1.date); -- add date to header text |
|||
else |
|||
table.insert (out, ': '); -- close header text |
|||
sand = sand .. ", " |
|||
end |
|||
end |
|||
local tot = ulx.url1.extraurls + 1; |
|||
return sand |
|||
for i=1, tot do -- loop through the additionals |
|||
end |
|||
local index = table.concat ({'url', i}); -- make an index |
|||
table.insert (out, '['); -- open extlink markup |
|||
table.insert (out, ulx[index]['url']); -- add url |
|||
table.insert (out, ' '); -- add required space |
|||
displayfield = ulx[index]['title']; |
|||
if 'addlarchives' == ulx.url1.format then |
|||
if not displayfield then |
|||
displayfield = ulx[index]['date'] |
|||
end |
|||
else -- must be addlpages |
|||
if not displayfield then |
|||
displayfield = table.concat ({'Page ', i}); |
|||
end |
|||
end |
|||
table.insert (out, displayfield); -- add title, date, page label text |
|||
table.insert (out, ']'); -- close extlink markup |
|||
table.insert (out, (i==tot and '.' or ', ')); -- add terminator |
|||
end |
|||
return table.concat (out); -- make a big string and done |
|||
end |
|||
end |
end |
||
function p.webarchive(frame) |
|||
args = frame.args |
|||
if (args[1]==nil) and (args["url"]==nil) then -- if no argument provided than check parent template/module args |
|||
args = frame:getParent().args |
|||
end |
|||
local tname = "Webarchive" -- name of calling template. Change if template rename. |
|||
ulx = {} -- Associative array to hold template data |
|||
track = {} -- Associative array to hold tracking categories |
|||
maxurls = 10 -- Max number of URLs allowed. |
|||
local verifydates = "yes" -- See documentation. Set "no" to disable. |
|||
--[[--------------------------< W E B A R C H I V E >---------------------------------------------------------- |
|||
-- URL argument (first) |
|||
template entry point |
|||
local url1 = trimArg(args.url) or trimArg(args.url1) |
|||
if not url1 then |
|||
return inlineError("url", "Empty.") .. createTracking() |
|||
end |
|||
if mw.ustring.find( url1, "https://web.http", 1, true ) then -- track bug |
|||
track["Category:Webarchive template errors"] = 1 |
|||
return inlineError("url", "https://web.http") .. createTracking() |
|||
end |
|||
if url1 == "https://web.archive.org/http:/" then -- track bug |
|||
track["Category:Webarchive template errors"] = 1 |
|||
return inlineError("url", "Invalid URL") .. createTracking() |
|||
end |
|||
TODO: deprecate empty |nolink= as a 'positive' assertion that archive service is not to be linked |
|||
ulx.url1 = {} |
|||
ulx.url1.url = url1 |
|||
if not mw.ustring.find( mw.ustring.lower(url1), "^http") then |
|||
if not mw.ustring.find( url1, "^//") then |
|||
ulx.url1.url = "http://" .. url1 |
|||
end |
|||
end |
|||
local uri1 = mw.uri.new(ulx.url1.url) |
|||
ulx.url1.host = uri1.host |
|||
ulx.url1.extraurls = parseExtraArgs() |
|||
]] |
|||
-- Nolink argument |
|||
local function webarchive(frame) |
|||
local args = getArgs (frame, { -- TODO: delete this assignment |
|||
valueFunc = function (key, value) -- this code so that we can detect and handle the oddity that is |nolink= |
|||
if 'nolink' == key then -- |nolink= is 'set' when present with or without assigned value; TODO: deprecate this peculiar use |
|||
return value; -- don't trim; we don't care (right now) what the value is except when nil and we can't trim nil |
|||
elseif value then -- all other values: if the value is not nil |
|||
value = mw.text.trim (value); -- trim whitespace |
|||
if '' ~= value then -- empty string when value was only whitespace or was empty |
|||
return value; -- return non-nil, non-empty values |
|||
end |
|||
end |
|||
return nil; -- value was nil, empty, or contained only whitespace |
|||
end -- end of valueFunc |
|||
}); |
|||
-- local args = getArgs (frame); -- TODO: replace the above with this |
|||
serviceName(uri1.host, nolink) |
|||
local data = mw.loadData (table.concat ({ -- make a data module name; sandbox or live |
|||
'Module:Webarchive/data', |
|||
frame:getTitle():find('sandbox', 1, true) and '/sandbox' or '' -- this instance is ./sandbox then append /sandbox |
|||
})); |
|||
categories = data.categories; -- fill in the forward declarations |
|||
err_warn_msgs = data.err_warn_msgs; |
|||
excepted_pages = data.excepted_pages; |
|||
prefixes = data.prefixes; |
|||
services = data.services; |
|||
uncategorized_namespaces = data.uncategorized_namespaces; |
|||
uncategorized_subpages = data.uncategorized_subpages; |
|||
local date, format, msg, uri, url; |
|||
-- Date argument |
|||
verifydates = 'yes' == verifydates; -- convert to boolean |
|||
if args.url and args.url1 then -- URL argument (first) |
|||
local date = trimArg(args.date) or trimArg(args.date1) |
|||
return inlineError("url", "Conflicting |url= and |url1=.") .. createTracking(); |
|||
if date == "*" and (ulx.url1.service == "wayback" or ulx.url1.service == "locwebarchives") then |
|||
end |
|||
date = "index" |
|||
elseif date and (ulx.url1.service == "wayback" or ulx.url1.service == "locwebarchives") and verifydates == "yes" then |
|||
url = args.url or args.url1; |
|||
local ldf = dateFormat(date) |
|||
if ldf then |
|||
if not url then |
|||
local udate = decodeWaybackDate( uri1.path, ldf ) |
|||
return inlineError("url", "Empty.") .. createTracking() |
|||
if udate ~= date then |
|||
end |
|||
date = udate .. inlineRed("<sup>[Date mismatch]</sup>", "warning") |
|||
if mw.ustring.find( url, "https://web.http", 1, true ) then -- track bug - TODO: IAbot bug; not known if the bug has been fixed; deferred |
|||
end |
|||
track[categories.error] = 1; |
|||
end |
|||
return inlineError("url", "https://web.http") .. createTracking() |
|||
elseif date and ulx.url1.service == "webcite" and verifydates == "yes" then |
|||
end |
|||
local ldf = dateFormat(date) |
|||
if url == "https://web.archive.org/http:/" then -- track bug - TODO: IAbot bug; not known if the bug has been fixed; deferred |
|||
if ldf then |
|||
track[categories.error] = 1; |
|||
local udate = decodeWebciteDate( uri1.path, ldf ) |
|||
return inlineError("url", "Invalid URL") .. createTracking() |
|||
if udate == "query" then -- skip |
|||
end |
|||
elseif udate ~= date then |
|||
date = udate .. inlineRed("<sup>[Date mismatch]</sup>", "warning") |
|||
end |
|||
end |
|||
elseif date and ulx.url1.service == "archiveis" and verifydates == "yes" then |
|||
local ldf = dateFormat(date) |
|||
if ldf then |
|||
local udate = decodeArchiveisDate( uri1.path, ldf ) |
|||
if udate == "short link" then -- skip |
|||
elseif udate ~= date then |
|||
date = udate .. inlineRed("<sup>[Date mismatch]</sup>", "warning") |
|||
end |
|||
end |
|||
elseif not date and (ulx.url1.service == "wayback" or ulx.url1.service == "locwebarchives") then |
|||
date = decodeWaybackDate( uri1.path, "iso" ) |
|||
if not date then |
|||
date = inlineRed("[Date error] (1)", "error") |
|||
end |
|||
elseif not date and ulx.url1.service == "webcite" then |
|||
date = decodeWebciteDate( uri1.path, "iso" ) |
|||
if date == "query" then |
|||
date = inlineRed("[Date missing]", "warning") |
|||
elseif not date then |
|||
date = inlineRed("[Date error] (1)", "error") |
|||
end |
|||
elseif not date and ulx.url1.service == "archiveis" then |
|||
date = decodeArchiveisDate( uri1.path, "iso" ) |
|||
if date == "short link" then |
|||
date = inlineRed("[Date missing]", "warning") |
|||
elseif not date then |
|||
date = inlineRed("[Date error] (1)", "error") |
|||
end |
|||
elseif not date then |
|||
date = inlineRed("[Date missing]", "warning") |
|||
end |
|||
ulx.url1.date = date |
|||
ulx.url1 = {} |
|||
-- Format argument |
|||
ulx.url1.url = url |
|||
if not (url:lower():find ('^http') or url:find ('^//')) then -- TODO: is this a good idea? isn't it better to simply throw an error when url is malformed ... |
|||
ulx.url1.url = 'http://' .. url -- ... rather than apply this 'fix' that might not fix anything? |
|||
end |
|||
ulx.url1.extraurls = parseExtraArgs(args) |
|||
local format = trimArg(args.format) |
|||
if not format then |
|||
format = "none" |
|||
else |
|||
if format == "addlpages" then |
|||
if not ulx.url1.date then |
|||
format = "none" |
|||
end |
|||
elseif format == "addlarchives" then |
|||
format = "addlarchives" |
|||
else |
|||
format = "none" |
|||
end |
|||
end |
|||
ulx.url1.format = format |
|||
uri = mw.uri.new (ulx.url1.url); -- get a table of uri parts from this url |
|||
-- Title argument |
|||
serviceName(uri.host, args.nolink) |
|||
if args.date and args.date1 then -- Date argument |
|||
local title = trimArg(args.title) or trimArg(args.title1) |
|||
return inlineError("date", "Conflicting |date= and |date1=.") .. createTracking(); |
|||
ulx.url1.title = title |
|||
end |
|||
date = args.date or args.date1 |
|||
if 'wayback' == ulx.url1.service or 'locwebarchives' == ulx.url1.service then |
|||
local rend = createRendering() |
|||
if '*' == date then -- TODO: why is this not compared to url date? |
|||
if not rend then |
|||
date = 'index'; |
|||
rend = '<span style="font-size:100%" class="error citation-comment">Error in [[:Template:' .. tname .. ']]: Unknown problem. Please report on template talk page.</span>' |
|||
end |
|||
track["Category:Webarchive template errors"] = 1 |
|||
if date then |
|||
end |
|||
if verifydates then |
|||
local ldf = dateFormat(date) |
|||
if ldf then |
|||
local udate, msg = decodeWaybackDate( uri.path, ldf ) -- get the url date in the same format as date in |date=; 'index' when wayback date is * |
|||
if udate ~= date then |
|||
date = udate .. inlineRed (err_warn_msgs.mismatch, 'warning') .. (msg or ''); -- mismatch us url date; add message if there is one |
|||
else |
|||
date = date .. (msg or ''); -- add message if there is one |
|||
end |
|||
end |
|||
end |
|||
else -- no |date= |
|||
date, msg = decodeWaybackDate( uri.path, "iso" ) |
|||
if not date then |
|||
date = inlineRed (err_warn_msgs.date1, 'error'); -- TODO: change this type of message so that it identifies url as source of error? |
|||
else |
|||
date = date .. (msg or ''); -- add message if there is one |
|||
end |
|||
end |
|||
elseif 'webcite' == ulx.url1.service then |
|||
return rend .. createTracking() |
|||
if date then |
|||
if verifydates then |
|||
local ldf = dateFormat(date) |
|||
if ldf then |
|||
local udate = decodeWebciteDate( uri.path, ldf ) -- get the url date in the same format as date in |date= |
|||
if 'query' ~= udate then -- skip if query |
|||
if udate ~= date then |
|||
date = udate .. inlineRed (err_warn_msgs.mismatch, 'warning'); |
|||
end |
|||
end |
|||
end |
|||
end |
|||
else |
|||
date = decodeWebciteDate( uri.path, "iso" ) |
|||
if date == "query" then |
|||
date = inlineRed (err_warn_msgs.date_miss, 'warning'); |
|||
elseif not date then |
|||
date = inlineRed (err_warn_msgs.date1, 'error'); |
|||
end |
|||
end |
|||
elseif 'archiveis' == ulx.url1.service then |
|||
if date then |
|||
if verifydates then |
|||
local ldf = dateFormat(date) |
|||
if ldf then |
|||
local udate, msg = decodeArchiveisDate( uri.path, ldf ) -- get the url date in the same format as date in |date= |
|||
if 'short link' ~= udate then -- skip if short link |
|||
if udate ~= date then |
|||
date = udate .. inlineRed (err_warn_msgs.mismatch, 'warning') .. (msg or ''); -- mismatch: use url date; add message if there is one |
|||
else |
|||
date = date .. (msg or ''); -- add message if there is one |
|||
end |
|||
end |
|||
end |
|||
end |
|||
else -- no |date= |
|||
date, msg = decodeArchiveisDate( uri.path, "iso" ) |
|||
if date == "short link" then |
|||
date = inlineRed (err_warn_msgs.date_miss, 'warning'); |
|||
elseif not date then |
|||
date = inlineRed (err_warn_msgs.date1, 'error'); |
|||
else |
|||
date = date .. (msg or ''); -- add message if there is one |
|||
end |
|||
end |
|||
else -- some other service |
|||
if not date then |
|||
date = inlineRed (err_warn_msgs.date_miss, 'warning'); |
|||
end |
|||
end |
|||
ulx.url1.date = date |
|||
format = args.format; -- Format argument |
|||
if not format then |
|||
format = "none" |
|||
else |
|||
if format == "addlpages" then |
|||
if not ulx.url1.date then |
|||
format = "none" |
|||
end |
|||
elseif format == "addlarchives" then |
|||
format = "addlarchives" |
|||
else |
|||
format = "none" |
|||
end |
|||
end |
|||
ulx.url1.format = format |
|||
if args.title and args.title1 then -- Title argument |
|||
return inlineError("title", "Conflicting |title= and |title1=.") .. createTracking(); |
|||
end |
|||
ulx.url1.title = args.title or args.title1; |
|||
local rend = createRendering() |
|||
if not rend then |
|||
rend = '<span style="font-size:100%" class="error citation-comment">Error in [[:Template:' .. tname .. ']]: Unknown problem. Please report on template talk page.</span>' |
|||
track[categories.error] = 1; |
|||
end |
|||
return rend .. createTracking() |
|||
end |
end |
||
return p |
|||
--[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------ |
|||
]] |
|||
return {webarchive = webarchive}; |
|||
11:01, 9 செப்டெம்பர் 2018 இல் நிலவும் திருத்தம்
Documentation for this module may be created at Module:Webarchive/doc
--[[ ----------------------------------
Lua module implementing the {{webarchive}} template.
A merger of the functionality of three templates: {{wayback}}, {{webcite}} and {{cite archives}}
]]
require('Module:No globals');
local getArgs = require ('Module:Arguments').getArgs;
local this_page = mw.title.getCurrentTitle();
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >--------------------------------------
]]
local categories = {}; -- category names from ./data
local err_warn_msgs = {}; -- error and warning messages from ./data
local excepted_pages = {};
local prefixes = {}; -- service provider tail string prefixes from ./data
local services = {}; -- archive service provider data from ./data
local uncategorized_namespaces = {}; -- list of namespaces that we should not categorize
local uncategorized_subpages = {}; -- list of subpages that should not be categorized
local ulx = {}; -- Associative array to hold template data
local track = {}; -- Associative array to hold tracking categories
--[[--------------------------< G L O B A L C O N F I G U R A T I O N S E T T I N G S >--------------------
]]
local maxurls = 10; -- Max number of URLs allowed.
local tname = 'Webarchive' -- name of calling template. Change if template rename.
local verifydates = 'yes' -- See documentation. Set "no" to disable.
--[[--------------------------< inlineError >-----------------------
Critical error. Render output completely in red. Add to tracking category.
]]
local function inlineError(arg, msg)
track[categories.error] = 1
return '<span style="font-size:100%" class="error citation-comment">Error in webarchive template: Check <code style="color:inherit; border:inherit; padding:inherit;">|' .. arg .. '=</code> value. ' .. msg .. '</span>'
end
--[[--------------------------< inlineRed >-----------------------
Render a text fragment in red, such as a warning as part of the final output.
Add tracking category.
]]
local function inlineRed(msg, trackmsg)
if trackmsg == "warning" then
track[categories.warning] = 1;
elseif trackmsg == "error" then
track[categories.error] = 1;
end
return '<span style="font-size:100%" class="error citation-comment">' .. msg .. '</span>'
end
--[[--------------------------< base62 >-----------------------
Convert base-62 to base-10
Credit: https://de.wikipedia.org/wiki/Modul:Expr
]]
local function base62( value )
local r = 1 -- default return value is input value is malformed
if value:match( "^%w+$" ) then -- value must only be in the set [0-9a-zA-Z]
local n = #value -- number of characters in value
local k = 1
local c
r = 0
for i = n, 1, -1 do -- loop through all characters in value from ls digit to ms digit
c = value:byte( i, i )
if c >= 48 and c <= 57 then -- character is digit 0-9
c = c - 48
elseif c >= 65 and c <= 90 then -- character is ascii a-z
c = c - 55
else -- must be ascii A-Z
c = c - 61
end
r = r + c * k -- accumulate this base62 character's value
k = k * 62 -- bump for next
end -- for i
end
return r
end
--[[--------------------------< tableLength >-----------------------
Given a 1-D table, return number of elements
]]
local function tableLength(T)
local count = 0
for _ in pairs(T) do count = count + 1 end
return count
end
--[[--------------------------< dateFormat >-----------------------
Given a date string, return its format: dmy, mdy, iso, ymd
If unable to determine return nil
]]
local function dateFormat(date)
local patterns = {
['iso'] = '(%d%d%d%d)%-%d%d%-%d%d',
['dmy'] = '%d%d? +%a+ +(%d%d%d%d)',
['mdy'] = '%a+ %d%d?, +(%d%d%d%d)',
['ymd'] = '(%d%d%d%d) +%a+ %d%d?', -- TODO: not mos compliant; delete?
};
local form, y;
for k, v in pairs (patterns) do -- loop through the patterns table
y = mw.ustring.match (date, v); -- looking for a match
if y then -- not nil when found
form = k; -- save that
break; -- and done
end
end
return (y and (1900 < tonumber(y) and 2200 > tonumber(y))) and form; -- TODO: why 1900? shouldn't that be birth-of-intenet year? why 2200? shouldn't that be current year?
end
--[[--------------------------< makeDate >-----------------------
Given a zero-padded 4-digit year, 2-digit month and 2-digit day, return a full date in df format
df = mdy, dmy, iso, ymd
on entry, year, month, day are presumed to be correct for the date that they represent; all are required
]]
local function makeDate(year, month, day, df)
local format = {
['dmy'] = 'j F Y',
['mdy'] = 'F j, Y',
['ymd'] = 'Y F j',
['iso'] = 'Y-m-d',
};
if not year or '' == year or not month or '' == month or not day or '' == day and format[df] then
return nil;
end
local date = table.concat ({year, month, day}, '-'); -- assemble iso format date
return mw.getContentLanguage():formatDate (format[df], date);
end
--[[--------------------------< I S _ V A L I D _ D A T E >----------------------------------------------------
Returns true if date is after 31 December 1899 (why is 1900 the min year? shouldn't the internet's date-of-birth
be min year?), not after today's date, and represents a valid date (29 February 2017 is not a valid date). Applies
Gregorian leapyear rules.
all arguments are required
]]
local function is_valid_date (year, month, day)
local days_in_month = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
local month_length;
local y, m, d;
local today = os.date ('*t'); -- fetch a table of current date parts
if not year or '' == year or not month or '' == month or not day or '' == day then
return false; -- something missing
end
y = tonumber (year);
m = tonumber (month);
d = tonumber (day);
if 1900 > y or today.year < y or 1 > m or 12 < m then -- year and month are within bounds TODO: 1900?
return false;
end
if (2==m) then -- if February
month_length = 28; -- then 28 days unless
if (0==(y%4) and (0~=(y%100) or 0==(y%400))) then -- is a leap year?
month_length = 29; -- if leap year then 29 days in February
end
else
month_length=days_in_month[m];
end
if 1 > d or month_length < d then -- day is within bounds
return false;
end
-- here when date parts represent a valid date
return os.time({['year']=y, ['month']=m, ['day']=d, ['hour']=0}) <= os.time(); -- date at midnight must be less than or equal to current date/time
end
--[[--------------------------< decodeWebciteDate >-----------------------
Given a URI-path to Webcite (eg. /67xHmVFWP) return the encoded date in df format
]]
local function decodeWebciteDate(path, df)
local dt = {}
dt = mw.text.split(path, "/")
-- valid URL formats that are not base62
-- http://www.webcitation.org/query?id=1138911916587475
-- http://www.webcitation.org/query?url=http..&date=2012-06-01+21:40:03
-- http://www.webcitation.org/1138911916587475
-- http://www.webcitation.org/cache/73e53dd1f16cf8c5da298418d2a6e452870cf50e
-- http://www.webcitation.org/getfile.php?fileid=1c46e791d68e89e12d0c2532cc3cf629b8bc8c8e
if mw.ustring.find( dt[2], "query", 1, true) or
mw.ustring.find( dt[2], "cache", 1, true) or
mw.ustring.find( dt[2], "getfile", 1, true) or
tonumber(dt[2]) then
return "query"
end
dt = os.date('*t', string.format("%d", base62(dt[2])):sub(1,10)) -- base62 string -> exponential number -> text -> first 10 characters -> a table of date parts
if not is_valid_date (dt.year, dt.month, dt.day) then
return inlineRed(err_warn_msgs.date_err, 'error');
end
return makeDate(dt.year, dt.month, dt.day, df) or inlineRed (err_warn_msgs.date4, 'error');
end
--[[--------------------------< decodeWaybackDate >-----------------------
Given a URI-path to Wayback (eg. /web/20160901010101/http://example.com )
or Library of Congress Web Archives (/all/20160901010101/http://example.com)
return the formatted date eg. "September 1, 2016" in df format
Handle non-digits in snapshot ID such as "re_" and "-" and "*"
]]
local function decodeWaybackDate(path, df)
local msg, snapdate;
snapdate = path:gsub ('^/all/', ''):gsub ('^/web/', ''):gsub ('^/', ''); -- remove leading '/all/', leading '/web/' or leading '/'
snapdate = snapdate:match ('^[^/]+'); -- get timestamp
if snapdate == "*" then -- eg. /web/*/http.. or /all/*/http..
return "index"
end
snapdate = snapdate:gsub ('%a%a_%d?$', ''):gsub ('%-', ''); -- from date, remove any trailing "re_", dashes
msg = '';
if snapdate:match ('%*$') then -- a trailing '*' causes calendar display at archive .org
snapdate = snapdate:gsub ('%*$', ''); -- remove so not part of length calc later
-- msg = inlineRed(err_warn_msgs.ts_cal, 'warning'); -- TODO: enable this -- make a message
end
if not tonumber(snapdate) then
return inlineRed (err_warn_msgs.date2, 'error');
end
local dlen = string.len(snapdate)
if dlen < 8 then -- we need 8 digits TODO: but shouldn't this be testing for 14 digits?
return inlineRed (err_warn_msgs.date3, 'error');
end
local year, month, day = snapdate:match ('(%d%d%d%d)(%d%d)(%d%d)'); -- no need for snapdatelong here
if not is_valid_date (year, month, day) then
return inlineRed(err_warn_msgs.date_err, 'error');
end
return makeDate(year, month, day, df) or inlineRed (err_warn_msgs.date7, 'error');
--[[ snapdate = makeDate(year, month, day, df); -- TODO: enable this
if snapdate then
if 14 == dlen then
return snapdate, msg; -- return date with message if any
else
return snapdate, msg .. inlineRed(err_warn_msgs.ts_len, 'warning'); -- return date with warning message(s)
end
else
return inlineRed (err_warn_msgs.date7, 'error'); -- return error message
end
]]
end
--[[--------------------------< decodeArchiveisDate >-----------------------
Given an Archive.is "long link" URI-path (e.g. /2016.08.28-144552/http://example.com)
return the date in df format (e.g. if df = dmy, return 28 August 2016)
Handles "." and "-" in snapshot date, so 2016.08.28-144552 is same as 20160828144552
]]
local function decodeArchiveisDate(path, df)
local snapdate
if path:match ('^/%w+$') then -- short form url path is '/' followed by some number of base 62 digits and nothing else
return "short link" -- e.g. http://archive.is/hD1qz
end
snapdate = mw.text.split (path, '/')[2]:gsub('[%.%-]', ''); -- get snapshot date, e.g. 2016.08.28-144552; remove periods and hyphens
local dlen = string.len(snapdate)
if dlen < 8 then -- we need 8 digits TODO: but shouldn't this be testing for 14 digits?
return inlineRed (err_warn_msgs.date3, 'error');
end
local year, month, day = snapdate:match ('(%d%d%d%d)(%d%d)(%d%d)'); -- no need for snapdatelong here
if not is_valid_date (year, month, day) then
return inlineRed(err_warn_msgs.date_err, 'error');
end
------ return makeDate(year, month, day, df) or inlineRed (err_warn_msgs.date7, 'error');
snapdate = makeDate(year, month, day, df); -- TODO: enable this
if snapdate then
if 14 == dlen then
return snapdate; -- return date
else
return snapdate, inlineRed(err_warn_msgs.ts_len, 'warning'); -- return date with warning message
end
else
return inlineRed (err_warn_msgs.date7, 'error'); -- return error message
end
end
--[=[-------------------------< M A K E _ W I K I L I N K >----------------------------------------------------
Makes a wikilink; when both link and display text is provided, returns a wikilink in the form [[L|D]]; if only
link is provided, returns a wikilink in the form [[L]]; if neither are provided or link is omitted, returns an
empty string.
]=]
local function make_wikilink (link, display, no_link)
if nil == no_link then
if link and ('' ~= link) then
if display and ('' ~= display) then
return table.concat ({'[[', link, '|', display, ']]'});
else
return table.concat ({'[[', link, ']]'});
end
end
return display or ''; -- link not set so return the display text
else -- no_link
if display and ('' ~= display) then -- if there is display text
return display; -- return that
else
return link or ''; -- return the target article name or empty string
end
end
end
--[[--------------------------< serviceName >-----------------------
Given a domain extracted by mw.uri.new() (eg. web.archive.org) set tail string and service ID
]]
local function serviceName(host, no_link)
local tracking;
local index;
host = host:lower():gsub ('^web%.(.+)', '%1'):gsub ('^www%.(.+)', '%1'); -- lowercase, remove web. and www. subdomains
if services[host] then
index = host;
else
for k, _ in pairs (services) do
if host:find ('%f[%a]'..k:gsub ('([%.%-])', '%%%1')) then
index = k;
break;
end
end
end
if index then
local out = {''}; -- empty string in [1] so that concatenated result has leading single space
ulx.url1.service = services[index][4] or 'other';
tracking = services[index][5] or categories.other;
-- build tail string
if false == services[index][1] then -- select prefix
table.insert (out, prefixes.at);
elseif true == services[index][1] then
table.insert (out, prefixes.atthe);
else
table.insert (out, services[index][1]);
end
table.insert (out, make_wikilink (services[index][2], services[index][3], no_link)); -- add article wikilink
if services[index][6] then -- add tail postfix if it exists
table.insert (out, services[index][6]);
end
ulx.url1.tail = table.concat (out, ' '); -- put it all together; result has leading space character
else -- here when unknown archive
ulx.url1.service = 'other';
tracking = categories.unknown;
ulx.url1.tail = table.concat ({'', prefixes.at, host, inlineRed (err_warn_msgs.unknown_url)}, ' '); -- TODO: call to inlineRed() does not specify 'error' or 'warning'; should it?
end
track[tracking] = 1
end
--[[--------------------------< parseExtraArgs >-----------------------
Parse numbered arguments starting at 2, such as url2..url10, date2..date10, title2..title10
For example: {{webarchive |url=.. |url4=.. |url7=..}}
Three url arguments not in numeric sequence (1..4..7).
Function only processes arguments numbered 2 or greater (in this case 4 and 7)
It creates numeric sequenced table entries like:
urlx.url2.url = <argument value for url4>
urlx.url3.url = <argument value for url7>
Returns the number of URL arguments found numbered 2 or greater (in this case returns "2")
]]
local function parseExtraArgs(args)
local i, j, argurl, argurl2, argdate, argtitle
j = 2
for i = 2, maxurls do
argurl = "url" .. i
if args[argurl] then
argurl2 = "url" .. j
ulx[argurl2] = {}
ulx[argurl2]["url"] = args[argurl]
argdate = "date" .. j
if args[argdate] then
ulx[argurl2]["date"] = args[argdate]
else
ulx[argurl2]["date"] = inlineRed (err_warn_msgs.date_miss, 'warning');
end
argtitle = "title" .. j
if args[argtitle] then
ulx[argurl2]["title"] = args[argtitle]
else
ulx[argurl2]["title"] = nil
end
j = j + 1
end
end
if j == 2 then
return 0
else
return j - 2
end
end
--[[--------------------------< comma >-----------------------
Given a date string, return "," if it's MDY
]]
local function comma(date)
return (date and date:match ('%a+ +%d%d?(,) +%d%d%d%d')) or '';
end
--[[--------------------------< createTracking >-----------------------
Return data in track[] ie. tracking categories
]]
local function createTracking()
if not excepted_pages[this_page.fullText] then -- namespace:title/fragment is allowed to be categorized (typically this module's / template's testcases page(s))
if uncategorized_namespaces[this_page.nsText] then -- TODO: enable this chunk
return ''; -- this page not to be categorized so return empty string
end
for _,v in ipairs (uncategorized_subpages) do -- cycle through page name patterns
if this_page.text:match (v) then -- test page name against each pattern
return ''; -- this subpage type not to be categorized so return empty string
end
end
end
local out = {};
if tableLength(track) > 0 then
for key, _ in pairs(track) do -- loop through table
table.insert (out, make_wikilink (key)); -- and convert category names to links
end
end
return table.concat (out); -- concat into one big string; empty string if table is empty
end
--[[--------------------------< createRendering >-----------------------
Return a rendering of the data in ulx[][]
TODO: when archive date is '*' ('index') leading archive extlink should be [<url> Archive index] instead of
[<url> Archived] index; code to support this has been added but is commented out for the time being; look for TODO1
]]
local function createRendering()
local displayfield
local out = {};
local period1 = ''; -- For backwards compat with {{wayback}}
local period2 = '.';
if 'none' == ulx.url1.format then -- For {{wayback}}, {{webcite}}
table.insert (out, '['); -- open extlink markup
table.insert (out, ulx.url1.url); -- add url
if ulx.url1.title then
table.insert (out, ' ') -- the required space
table.insert (out, ulx.url1.title) -- the title
table.insert (out, ']'); -- close extlink markup
table.insert (out, ulx.url1.tail); -- tail text
if ulx.url1.date then
table.insert (out, ' ('); -- open date text; TODO: why the html entity?
table.insert (out, 'index' == ulx.url1.date and 'archive' or 'archived'); -- add text
table.insert (out, ' '); -- insert a space
table.insert (out, ulx.url1.date); -- add date
table.insert (out, ')'); -- close date text
end
else -- no title
table.insert (out, ' Archived]') -- close extlink markup TODO1: remove this line
--TODO1 table.insert (out, 'index' == ulx.url1.date and ' Archive index]' or ' Archived]'); -- begin link label-- use this line for correct link label when date is 'index'
if ulx.url1.date then
if 'wayback' == ulx.url1.service then
period1 = '.';
period2 = '';
end
table.insert (out, table.concat ({' ', ulx.url1.date})); -- add date TODO1: remove this line
--[[TODO1 if 'index' ~= ulx.url1.date then -- TODO1: add this line -- use this if for correct link label when date is 'index'
table.insert (out, ulx.url1.date); -- add date TODO1: add this line -- use this if for correct link label when date is 'index'
end -- TODO1: add this line -- use this if for correct link label when date is 'index'
]] table.insert (out, comma(ulx.url1.date)); -- add ',' if date format is mdy
table.insert (out, ulx.url1.tail); -- add tail text
table.insert (out, period1); -- terminate
else -- no date
table.insert (out, ulx.url1.tail); -- add tail text
end
end
if 0 < ulx.url1.extraurls then -- For multiple archive URLs
local tot = ulx.url1.extraurls + 1
table.insert (out, period2); -- terminate first url
table.insert (out, ' Additional archives: '); -- add header text
for i=2, tot do -- loop through the additionals
local index = table.concat ({'url', i}); -- make an index
displayfield = ulx[index]['title'] and 'title' or 'date'; -- choose display text
table.insert (out, '['); -- open extlink markup
table.insert (out, ulx[index]['url']); -- add the url
table.insert (out, ' '); -- the required space
table.insert (out, ulx[index][displayfield]); -- add the label
table.insert (out, ']'); -- close extlink markup
table.insert (out, i==tot and '.' or ', '); -- add terminator
end
end
return table.concat (out); -- make a big string and done
else -- For {{cite archives}}
if 'addlarchives' == ulx.url1.format then -- Multiple archive services
table.insert (out, 'Additional archives: '); -- add header text
else -- Multiple pages from the same archive
table.insert (out, 'Additional pages archived on '); -- add header text
table.insert (out, ulx.url1.date); -- add date to header text
table.insert (out, ': '); -- close header text
end
local tot = ulx.url1.extraurls + 1;
for i=1, tot do -- loop through the additionals
local index = table.concat ({'url', i}); -- make an index
table.insert (out, '['); -- open extlink markup
table.insert (out, ulx[index]['url']); -- add url
table.insert (out, ' '); -- add required space
displayfield = ulx[index]['title'];
if 'addlarchives' == ulx.url1.format then
if not displayfield then
displayfield = ulx[index]['date']
end
else -- must be addlpages
if not displayfield then
displayfield = table.concat ({'Page ', i});
end
end
table.insert (out, displayfield); -- add title, date, page label text
table.insert (out, ']'); -- close extlink markup
table.insert (out, (i==tot and '.' or ', ')); -- add terminator
end
return table.concat (out); -- make a big string and done
end
end
--[[--------------------------< W E B A R C H I V E >----------------------------------------------------------
template entry point
TODO: deprecate empty |nolink= as a 'positive' assertion that archive service is not to be linked
]]
local function webarchive(frame)
local args = getArgs (frame, { -- TODO: delete this assignment
valueFunc = function (key, value) -- this code so that we can detect and handle the oddity that is |nolink=
if 'nolink' == key then -- |nolink= is 'set' when present with or without assigned value; TODO: deprecate this peculiar use
return value; -- don't trim; we don't care (right now) what the value is except when nil and we can't trim nil
elseif value then -- all other values: if the value is not nil
value = mw.text.trim (value); -- trim whitespace
if '' ~= value then -- empty string when value was only whitespace or was empty
return value; -- return non-nil, non-empty values
end
end
return nil; -- value was nil, empty, or contained only whitespace
end -- end of valueFunc
});
-- local args = getArgs (frame); -- TODO: replace the above with this
local data = mw.loadData (table.concat ({ -- make a data module name; sandbox or live
'Module:Webarchive/data',
frame:getTitle():find('sandbox', 1, true) and '/sandbox' or '' -- this instance is ./sandbox then append /sandbox
}));
categories = data.categories; -- fill in the forward declarations
err_warn_msgs = data.err_warn_msgs;
excepted_pages = data.excepted_pages;
prefixes = data.prefixes;
services = data.services;
uncategorized_namespaces = data.uncategorized_namespaces;
uncategorized_subpages = data.uncategorized_subpages;
local date, format, msg, uri, url;
verifydates = 'yes' == verifydates; -- convert to boolean
if args.url and args.url1 then -- URL argument (first)
return inlineError("url", "Conflicting |url= and |url1=.") .. createTracking();
end
url = args.url or args.url1;
if not url then
return inlineError("url", "Empty.") .. createTracking()
end
if mw.ustring.find( url, "https://web.http", 1, true ) then -- track bug - TODO: IAbot bug; not known if the bug has been fixed; deferred
track[categories.error] = 1;
return inlineError("url", "https://web.http") .. createTracking()
end
if url == "https://web.archive.org/http:/" then -- track bug - TODO: IAbot bug; not known if the bug has been fixed; deferred
track[categories.error] = 1;
return inlineError("url", "Invalid URL") .. createTracking()
end
ulx.url1 = {}
ulx.url1.url = url
if not (url:lower():find ('^http') or url:find ('^//')) then -- TODO: is this a good idea? isn't it better to simply throw an error when url is malformed ...
ulx.url1.url = 'http://' .. url -- ... rather than apply this 'fix' that might not fix anything?
end
ulx.url1.extraurls = parseExtraArgs(args)
uri = mw.uri.new (ulx.url1.url); -- get a table of uri parts from this url
serviceName(uri.host, args.nolink)
if args.date and args.date1 then -- Date argument
return inlineError("date", "Conflicting |date= and |date1=.") .. createTracking();
end
date = args.date or args.date1
if 'wayback' == ulx.url1.service or 'locwebarchives' == ulx.url1.service then
if '*' == date then -- TODO: why is this not compared to url date?
date = 'index';
end
if date then
if verifydates then
local ldf = dateFormat(date)
if ldf then
local udate, msg = decodeWaybackDate( uri.path, ldf ) -- get the url date in the same format as date in |date=; 'index' when wayback date is *
if udate ~= date then
date = udate .. inlineRed (err_warn_msgs.mismatch, 'warning') .. (msg or ''); -- mismatch us url date; add message if there is one
else
date = date .. (msg or ''); -- add message if there is one
end
end
end
else -- no |date=
date, msg = decodeWaybackDate( uri.path, "iso" )
if not date then
date = inlineRed (err_warn_msgs.date1, 'error'); -- TODO: change this type of message so that it identifies url as source of error?
else
date = date .. (msg or ''); -- add message if there is one
end
end
elseif 'webcite' == ulx.url1.service then
if date then
if verifydates then
local ldf = dateFormat(date)
if ldf then
local udate = decodeWebciteDate( uri.path, ldf ) -- get the url date in the same format as date in |date=
if 'query' ~= udate then -- skip if query
if udate ~= date then
date = udate .. inlineRed (err_warn_msgs.mismatch, 'warning');
end
end
end
end
else
date = decodeWebciteDate( uri.path, "iso" )
if date == "query" then
date = inlineRed (err_warn_msgs.date_miss, 'warning');
elseif not date then
date = inlineRed (err_warn_msgs.date1, 'error');
end
end
elseif 'archiveis' == ulx.url1.service then
if date then
if verifydates then
local ldf = dateFormat(date)
if ldf then
local udate, msg = decodeArchiveisDate( uri.path, ldf ) -- get the url date in the same format as date in |date=
if 'short link' ~= udate then -- skip if short link
if udate ~= date then
date = udate .. inlineRed (err_warn_msgs.mismatch, 'warning') .. (msg or ''); -- mismatch: use url date; add message if there is one
else
date = date .. (msg or ''); -- add message if there is one
end
end
end
end
else -- no |date=
date, msg = decodeArchiveisDate( uri.path, "iso" )
if date == "short link" then
date = inlineRed (err_warn_msgs.date_miss, 'warning');
elseif not date then
date = inlineRed (err_warn_msgs.date1, 'error');
else
date = date .. (msg or ''); -- add message if there is one
end
end
else -- some other service
if not date then
date = inlineRed (err_warn_msgs.date_miss, 'warning');
end
end
ulx.url1.date = date
format = args.format; -- Format argument
if not format then
format = "none"
else
if format == "addlpages" then
if not ulx.url1.date then
format = "none"
end
elseif format == "addlarchives" then
format = "addlarchives"
else
format = "none"
end
end
ulx.url1.format = format
if args.title and args.title1 then -- Title argument
return inlineError("title", "Conflicting |title= and |title1=.") .. createTracking();
end
ulx.url1.title = args.title or args.title1;
local rend = createRendering()
if not rend then
rend = '<span style="font-size:100%" class="error citation-comment">Error in [[:Template:' .. tname .. ']]: Unknown problem. Please report on template talk page.</span>'
track[categories.error] = 1;
end
return rend .. createTracking()
end
--[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------
]]
return {webarchive = webarchive};