모듈:String
이 모듈에 대한 설명문서는 모듈:String/설명문서에서 만들 수 있습니다
1 --[[
2
3 This module is intended to provide access to basic string functions.
4
5 Most of the functions provided here can be invoked with named parameters,
6 unnamed parameters, or a mixture. If named parameters are used, Mediawiki will
7 automatically remove any leading or trailing whitespace from the parameter.
8 Depending on the intended use, it may be advantageous to either preserve or
9 remove such whitespace.
10
11 Global options
12 ignore_errors: If set to 'true' or 1, any error condition will result in
13 an empty string being returned rather than an error message.
14
15 error_category: If an error occurs, specifies the name of a category to
16 include with the error message. The default category is
17 [Category:Errors reported by Module String].
18
19 no_category: If set to 'true' or 1, no category will be added if an error
20 is generated.
21
22 Unit tests for this module are available at Module:String/tests.
23 ]]
24
25 local str = {}
26
27 --[[
28 len
29
30 This function returns the length of the target string.
31
32 Usage:
33 {{#invoke:String|len|target_string|}}
34 OR
35 {{#invoke:String|len|s=target_string}}
36
37 Parameters
38 s: The string whose length to report
39
40 If invoked using named parameters, Mediawiki will automatically remove any leading or
41 trailing whitespace from the target string.
42 ]]
43 function str.len( frame )
44 local new_args = str._getParameters( frame.args, {'s'} )
45 local s = new_args['s'] or ''
46 return mw.ustring.len( s )
47 end
48
49 --[[
50 sub
51
52 This function returns a substring of the target string at specified indices.
53
54 Usage:
55 {{#invoke:String|sub|target_string|start_index|end_index}}
56 OR
57 {{#invoke:String|sub|s=target_string|i=start_index|j=end_index}}
58
59 Parameters
60 s: The string to return a subset of
61 i: The first index of the substring to return, defaults to 1.
62 j: The last index of the string to return, defaults to the last character.
63
64 The first character of the string is assigned an index of 1. If either i or j
65 is a negative value, it is interpreted the same as selecting a character by
66 counting from the end of the string. Hence, a value of -1 is the same as
67 selecting the last character of the string.
68
69 If the requested indices are out of range for the given string, an error is
70 reported.
71 ]]
72 function str.sub( frame )
73 local new_args = str._getParameters( frame.args, { 's', 'i', 'j' } )
74 local s = new_args['s'] or ''
75 local i = tonumber( new_args['i'] ) or 1
76 local j = tonumber( new_args['j'] ) or -1
77
78 local len = mw.ustring.len( s )
79
80 -- Convert negatives for range checking
81 if i < 0 then
82 i = len + i + 1
83 end
84 if j < 0 then
85 j = len + j + 1
86 end
87
88 if i > len or j > len or i < 1 or j < 1 then
89 return str._error( 'String subset index out of range' )
90 end
91 if j < i then
92 return str._error( 'String subset indices out of order' )
93 end
94
95 return mw.ustring.sub( s, i, j )
96 end
97
98 --[[
99 This function implements that features of {{str sub old}} and is kept in order
100 to maintain these older templates.
101 ]]
102 function str.sublength( frame )
103 local i = tonumber( frame.args.i ) or 0
104 local len = tonumber( frame.args.len )
105 return mw.ustring.sub( frame.args.s, i + 1, len and ( i + len ) )
106 end
107
108 --[[
109 _match
110
111 This function returns a substring from the source string that matches a
112 specified pattern. It is exported for use in other modules
113
114 Usage:
115 strmatch = require("Module:String")._match
116 sresult = strmatch( s, pattern, start, match, plain, nomatch )
117
118 Parameters
119 s: The string to search
120 pattern: The pattern or string to find within the string
121 start: The index within the source string to start the search. The first
122 character of the string has index 1. Defaults to 1.
123 match: In some cases it may be possible to make multiple matches on a single
124 string. This specifies which match to return, where the first match is
125 match= 1. If a negative number is specified then a match is returned
126 counting from the last match. Hence match = -1 is the same as requesting
127 the last match. Defaults to 1.
128 plain: A flag indicating that the pattern should be understood as plain
129 text. Defaults to false.
130 nomatch: If no match is found, output the "nomatch" value rather than an error.
131
132 For information on constructing Lua patterns, a form of [regular expression], see:
133
134 * http://www.lua.org/manual/5.1/manual.html#5.4.1
135 * http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Patterns
136 * http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns
137
138 ]]
139 -- This sub-routine is exported for use in other modules
140 function str._match( s, pattern, start, match_index, plain_flag, nomatch )
141 if s == '' then
142 return str._error( 'Target string is empty' )
143 end
144 if pattern == '' then
145 return str._error( 'Pattern string is empty' )
146 end
147 start = tonumber(start) or 1
148 if math.abs(start) < 1 or math.abs(start) > mw.ustring.len( s ) then
149 return str._error( 'Requested start is out of range' )
150 end
151 if match_index == 0 then
152 return str._error( 'Match index is out of range' )
153 end
154 if plain_flag then
155 pattern = str._escapePattern( pattern )
156 end
157
158 local result
159 if match_index == 1 then
160 -- Find first match is simple case
161 result = mw.ustring.match( s, pattern, start )
162 else
163 if start > 1 then
164 s = mw.ustring.sub( s, start )
165 end
166
167 local iterator = mw.ustring.gmatch(s, pattern)
168 if match_index > 0 then
169 -- Forward search
170 for w in iterator do
171 match_index = match_index - 1
172 if match_index == 0 then
173 result = w
174 break
175 end
176 end
177 else
178 -- Reverse search
179 local result_table = {}
180 local count = 1
181 for w in iterator do
182 result_table[count] = w
183 count = count + 1
184 end
185
186 result = result_table[ count + match_index ]
187 end
188 end
189
190 if result == nil then
191 if nomatch == nil then
192 return str._error( 'Match not found' )
193 else
194 return nomatch
195 end
196 else
197 return result
198 end
199 end
200
201 --[[
202 match
203
204 This function returns a substring from the source string that matches a
205 specified pattern.
206
207 Usage:
208 {{#invoke:String|match|source_string|pattern_string|start_index|match_number|plain_flag|nomatch_output}}
209 OR
210 {{#invoke:String|match|s=source_string|pattern=pattern_string|start=start_index
211 |match=match_number|plain=plain_flag|nomatch=nomatch_output}}
212
213 Parameters
214 s: The string to search
215 pattern: The pattern or string to find within the string
216 start: The index within the source string to start the search. The first
217 character of the string has index 1. Defaults to 1.
218 match: In some cases it may be possible to make multiple matches on a single
219 string. This specifies which match to return, where the first match is
220 match= 1. If a negative number is specified then a match is returned
221 counting from the last match. Hence match = -1 is the same as requesting
222 the last match. Defaults to 1.
223 plain: A flag indicating that the pattern should be understood as plain
224 text. Defaults to false.
225 nomatch: If no match is found, output the "nomatch" value rather than an error.
226
227 If invoked using named parameters, Mediawiki will automatically remove any leading or
228 trailing whitespace from each string. In some circumstances this is desirable, in
229 other cases one may want to preserve the whitespace.
230
231 If the match_number or start_index are out of range for the string being queried, then
232 this function generates an error. An error is also generated if no match is found.
233 If one adds the parameter ignore_errors=true, then the error will be suppressed and
234 an empty string will be returned on any failure.
235
236 For information on constructing Lua patterns, a form of [regular expression], see:
237
238 * http://www.lua.org/manual/5.1/manual.html#5.4.1
239 * http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Patterns
240 * http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns
241
242 ]]
243 -- This is the entry point for #invoke:String|match
244 function str.match( frame )
245 local new_args = str._getParameters( frame.args, {'s', 'pattern', 'start', 'match', 'plain', 'nomatch'} )
246 local s = new_args['s'] or ''
247 local start = tonumber( new_args['start'] ) or 1
248 local plain_flag = str._getBoolean( new_args['plain'] or false )
249 local pattern = new_args['pattern'] or ''
250 local match_index = math.floor( tonumber(new_args['match']) or 1 )
251 local nomatch = new_args['nomatch']
252
253 return str._match( s, pattern, start, match_index, plain_flag, nomatch )
254 end
255
256 --[[
257 pos
258
259 This function returns a single character from the target string at position pos.
260
261 Usage:
262 {{#invoke:String|pos|target_string|index_value}}
263 OR
264 {{#invoke:String|pos|target=target_string|pos=index_value}}
265
266 Parameters
267 target: The string to search
268 pos: The index for the character to return
269
270 If invoked using named parameters, Mediawiki will automatically remove any leading or
271 trailing whitespace from the target string. In some circumstances this is desirable, in
272 other cases one may want to preserve the whitespace.
273
274 The first character has an index value of 1.
275
276 If one requests a negative value, this function will select a character by counting backwards
277 from the end of the string. In other words pos = -1 is the same as asking for the last character.
278
279 A requested value of zero, or a value greater than the length of the string returns an error.
280 ]]
281 function str.pos( frame )
282 local new_args = str._getParameters( frame.args, {'target', 'pos'} )
283 local target_str = new_args['target'] or ''
284 local pos = tonumber( new_args['pos'] ) or 0
285
286 if pos == 0 or math.abs(pos) > mw.ustring.len( target_str ) then
287 return str._error( 'String index out of range' )
288 end
289
290 return mw.ustring.sub( target_str, pos, pos )
291 end
292
293 --[[
294 str_find
295
296 This function duplicates the behavior of {{str_find}}, including all of its quirks.
297 This is provided in order to support existing templates, but is NOT RECOMMENDED for
298 new code and templates. New code is recommended to use the "find" function instead.
299
300 Returns the first index in "source" that is a match to "target". Indexing is 1-based,
301 and the function returns -1 if the "target" string is not present in "source".
302
303 Important Note: If the "target" string is empty / missing, this function returns a
304 value of "1", which is generally unexpected behavior, and must be accounted for
305 separatetly.
306 ]]
307 function str.str_find( frame )
308 local new_args = str._getParameters( frame.args, {'source', 'target'} )
309 local source_str = new_args['source'] or ''
310 local target_str = new_args['target'] or ''
311
312 if target_str == '' then
313 return 1
314 end
315
316 local start = mw.ustring.find( source_str, target_str, 1, true )
317 if start == nil then
318 start = -1
319 end
320
321 return start
322 end
323
324 --[[
325 find
326
327 This function allows one to search for a target string or pattern within another
328 string.
329
330 Usage:
331 {{#invoke:String|find|source_str|target_string|start_index|plain_flag}}
332 OR
333 {{#invoke:String|find|source=source_str|target=target_str|start=start_index|plain=plain_flag}}
334
335 Parameters
336 source: The string to search
337 target: The string or pattern to find within source
338 start: The index within the source string to start the search, defaults to 1
339 plain: Boolean flag indicating that target should be understood as plain
340 text and not as a Lua style regular expression, defaults to true
341
342 If invoked using named parameters, Mediawiki will automatically remove any leading or
343 trailing whitespace from the parameter. In some circumstances this is desirable, in
344 other cases one may want to preserve the whitespace.
345
346 This function returns the first index >= "start" where "target" can be found
347 within "source". Indices are 1-based. If "target" is not found, then this
348 function returns 0. If either "source" or "target" are missing / empty, this
349 function also returns 0.
350
351 This function should be safe for UTF-8 strings.
352 ]]
353 function str.find( frame )
354 local new_args = str._getParameters( frame.args, {'source', 'target', 'start', 'plain' } )
355 local source_str = new_args['source'] or ''
356 local pattern = new_args['target'] or ''
357 local start_pos = tonumber(new_args['start']) or 1
358 local plain = new_args['plain'] or true
359
360 if source_str == '' or pattern == '' then
361 return 0
362 end
363
364 plain = str._getBoolean( plain )
365
366 local start = mw.ustring.find( source_str, pattern, start_pos, plain )
367 if start == nil then
368 start = 0
369 end
370
371 return start
372 end
373
374 --[[
375 replace
376
377 This function allows one to replace a target string or pattern within another
378 string.
379
380 Usage:
381 {{#invoke:String|replace|source_str|pattern_string|replace_string|replacement_count|plain_flag}}
382 OR
383 {{#invoke:String|replace|source=source_string|pattern=pattern_string|replace=replace_string|
384 count=replacement_count|plain=plain_flag}}
385
386 Parameters
387 source: The string to search
388 pattern: The string or pattern to find within source
389 replace: The replacement text
390 count: The number of occurences to replace, defaults to all.
391 plain: Boolean flag indicating that pattern should be understood as plain
392 text and not as a Lua style regular expression, defaults to true
393 ]]
394 function str.replace( frame )
395 local new_args = str._getParameters( frame.args, {'source', 'pattern', 'replace', 'count', 'plain' } )
396 local source_str = new_args['source'] or ''
397 local pattern = new_args['pattern'] or ''
398 local replace = new_args['replace'] or ''
399 local count = tonumber( new_args['count'] )
400 local plain = new_args['plain'] or true
401
402 if source_str == '' or pattern == '' then
403 return source_str
404 end
405 plain = str._getBoolean( plain )
406
407 if plain then
408 pattern = str._escapePattern( pattern )
409 replace = string.gsub( replace, "%%", "%%%%" ) --Only need to escape replacement sequences.
410 end
411
412 local result
413
414 if count ~= nil then
415 result = mw.ustring.gsub( source_str, pattern, replace, count )
416 else
417 result = mw.ustring.gsub( source_str, pattern, replace )
418 end
419
420 return result
421 end
422
423 --[[
424 simple function to pipe string.rep to templates.
425 ]]
426 function str.rep( frame )
427 local repetitions = tonumber( frame.args[2] )
428 if not repetitions then
429 return str._error( 'function rep expects a number as second parameter, received "' .. ( frame.args[2] or '' ) .. '"' )
430 end
431 return string.rep( frame.args[1] or '', repetitions )
432 end
433
434 --[[
435 escapePattern
436
437 This function escapes special characters from a Lua string pattern. See [1]
438 for details on how patterns work.
439
440 [1] https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Patterns
441
442 Usage:
443 {{#invoke:String|escapePattern|pattern_string}}
444
445 Parameters
446 pattern_string: The pattern string to escape.
447 ]]
448 function str.escapePattern( frame )
449 local pattern_str = frame.args[1]
450 if not pattern_str then
451 return str._error( 'No pattern string specified' )
452 end
453 local result = str._escapePattern( pattern_str )
454 return result
455 end
456
457 --[[
458 count
459 This function counts the number of occurrences of one string in another.
460 ]]
461 function str.count(frame)
462 local args = str._getParameters(frame.args, {'source', 'pattern', 'plain'})
463 local source = args.source or ''
464 local pattern = args.pattern or ''
465 local plain = str._getBoolean(args.plain or true)
466 if plain then
467 pattern = str._escapePattern(pattern)
468 end
469 local _, count = mw.ustring.gsub(source, pattern, '')
470 return count
471 end
472
473 --[[
474 endswith
475 This function determines whether a string ends with another string.
476 ]]
477 function str.endswith(frame)
478 local args = str._getParameters(frame.args, {'source', 'pattern'})
479 local source = args.source or ''
480 local pattern = args.pattern or ''
481 if pattern == '' then
482 -- All strings end with the empty string.
483 return "yes"
484 end
485 if mw.ustring.sub(source, -mw.ustring.len(pattern), -1) == pattern then
486 return "yes"
487 else
488 return ""
489 end
490 end
491
492 --[[
493 join
494
495 Join all non empty arguments together; the first argument is the separator.
496 Usage:
497 {{#invoke:String|join|sep|one|two|three}}
498 ]]
499 function str.join(frame)
500 local args = {}
501 local sep
502 for _, v in ipairs( frame.args ) do
503 if sep then
504 if v ~= '' then
505 table.insert(args, v)
506 end
507 else
508 sep = v
509 end
510 end
511 return table.concat( args, sep or '' )
512 end
513
514 --[[
515 Helper function that populates the argument list given that user may need to use a mix of
516 named and unnamed parameters. This is relevant because named parameters are not
517 identical to unnamed parameters due to string trimming, and when dealing with strings
518 we sometimes want to either preserve or remove that whitespace depending on the application.
519 ]]
520 function str._getParameters( frame_args, arg_list )
521 local new_args = {}
522 local index = 1
523 local value
524
525 for _, arg in ipairs( arg_list ) do
526 value = frame_args[arg]
527 if value == nil then
528 value = frame_args[index]
529 index = index + 1
530 end
531 new_args[arg] = value
532 end
533
534 return new_args
535 end
536
537 --[[
538 Helper function to handle error messages.
539 ]]
540 function str._error( error_str )
541 local frame = mw.getCurrentFrame()
542 local error_category = frame.args.error_category or 'Errors reported by Module String'
543 local ignore_errors = frame.args.ignore_errors or false
544 local no_category = frame.args.no_category or false
545
546 if str._getBoolean(ignore_errors) then
547 return ''
548 end
549
550 local error_str = '<strong class="error">String Module Error: ' .. error_str .. '</strong>'
551 if error_category ~= '' and not str._getBoolean( no_category ) then
552 error_str = '[[Category:' .. error_category .. ']]' .. error_str
553 end
554
555 return error_str
556 end
557
558 --[[
559 Helper Function to interpret boolean strings
560 ]]
561 function str._getBoolean( boolean_str )
562 local boolean_value
563
564 if type( boolean_str ) == 'string' then
565 boolean_str = boolean_str:lower()
566 if boolean_str == 'false' or boolean_str == 'no' or boolean_str == '0'
567 or boolean_str == '' then
568 boolean_value = false
569 else
570 boolean_value = true
571 end
572 elseif type( boolean_str ) == 'boolean' then
573 boolean_value = boolean_str
574 else
575 error( 'No boolean value found' )
576 end
577 return boolean_value
578 end
579
580 --[[
581 Helper function that escapes all pattern characters so that they will be treated
582 as plain text.
583 ]]
584 function str._escapePattern( pattern_str )
585 return ( string.gsub( pattern_str, "[%(%)%.%%%+%-%*%?%[%^%$%]]", "%%%0" ) )
586 end
587
588 return str