Unit CastleStringUtils

Description

String utilities. Also some operations on chars and PChars. And various conversions strings<->numbers.

General comments for all procedures that have parameter like IgnoreCase:

  • If such parameter has some default value, this default value should be

    True

    for procedures that only read processed string

    False

    for procedures that can modify processed string (for safety, so that accidental modification should be harder)

  • If I don't write in docs for this procedure whether this procedure takes current locale into account (as current locale can change the meaning of "ignoring case"), then it means it does take current locale into account.

Uses

Overview

Classes, Interfaces, Objects and Records

Name Description
Class TStringsHelper  
Class TCastleStringList List of strings.
Class TStringStringMap String-to-string map.
Class EDeformatError  
Record TPercentReplace  
Class EUnknownPercentFormat  
Class EInvalidChar  

Functions and Procedures

function RandomString: string;
procedure StringReplaceAllVar(var S: string; const FromPattern, ToPattern: string; IgnoreCase: boolean = true); overload;
function BreakLine(const s: string; const MaxCol: integer; const AllowedBreakChars: TSetOfChars = WhiteSpaces): string; overload;
function BreakLine(const s: string; const MaxCol: integer; const AllowedBreakChars: TSetOfChars; const Newline: string; const Indent: string): string; overload;
function SDeleteChars(const s: string; const excludedChars: TSetOfChars): string;
function SReplaceChars(const s, FromChars, ToChars: string): string; overload;
function SReplaceChars(const s: string; FromChars: TSetOfChars; ToChar: char): string; overload;
function SReplaceChars(const s: string; FromChar, ToChar: char): string; overload;
function SPad(const s: string; len: integer; c: char = ' '): string; overload;
function SZeroPad(const s: string; len: integer): string;
function LoCase(c: char): char;
function CharPos(c: char; const s: string; Offset: Integer = 1): integer; deprecated 'use SysUtils.Pos or StrUtils.PosEx instead';
function CharsPos(const chars: TSetOfChars; const s: string): integer;
function CharsPosEx(const chars: TSetOfChars; const s: string; Offset: Integer): integer;
function BackCharsPos(const chars: TSetOfChars; const s: string): integer;
function BackPos(const SubString, S: string): Integer; overload;
function BackPos(const SubString: char; const S: string): Integer; overload;
function FirstDelimiter(const Delimiters, S: string): Integer;
function SEnding(const s: string; P: integer): string;
function IsPrefix(const Prefix, S: string; IgnoreCase: boolean = true): boolean; overload;
function IsSuffix(const Suffix, S: string; IgnoreCase: boolean = true): boolean; overload;
function PrefixRemove(const Prefix, S: string; IgnoreCase: boolean): string;
function SuffixRemove(const Suffix, S: string; IgnoreCase: boolean): string;
procedure SAppendData(var s: string; const Data; DataSize: integer); deprecated 'this function is not very useful';
function SChar(const s: string; CharNum: integer): PChar; deprecated 'this function is not very useful';
function SCharIs(const s: string; index: integer; c: char): boolean; overload;
function SCharIs(const s: string; index: integer; const chars: TSetOfChars): boolean; overload;
function SReadableForm(const s: string): string; overload;
function SReadableForm(const C: char): string; overload;
function CopyPos(const s: string; StartPosition, EndPosition: integer): string;
procedure DeletePos(var S: string; StartPosition, EndPosition: Integer);
function NextToken(const S: string; var SeekPos: Integer; const TokenDelims: TSetOfChars = WhiteSpaces): string;
function NextTokenOnce(const s: string; SeekPos: integer = 1; const TokenDelims: TSetOfChars = WhiteSpaces): string; overload;
function CreateTokens(const s: string; const TokenDelims: TSetOfChars = WhiteSpaces): TCastleStringList;
function SplitString(const S: string; const Delimiter: char): TCastleStringList;
function GlueStrings(const Strings: array of string; const Delimiter: char): string; overload;
function GlueStrings(const Strings: array of string; const Delimiter: string): string; overload;
function GlueStrings(const Strings: TStrings; const Delimiter: char): string; overload;
function GlueStrings(const Strings: TStrings; const Delimiter: string): string; overload;
function FindPos(const SubText, Text: string; StartPosition, Count: integer; const Options: TSearchOptions; const WordBorders: TSetOfChars = DefaultWordBorders): integer; deprecated 'use various StrUtils routines to search in string instead of this (slow, not much useful) routine';
function SRight(const s: string; const rpart: integer): string;
function SAppendPart(const s, PartSeparator, NextPart: string): string;
procedure DeFormat(Data: string; const Format: string; const args: array of pointer; const IgnoreCase: boolean = true; const RelaxedWhitespaceChecking: boolean = true); overload;
function TryDeFormat(Data: string; const Format: string; const args: array of pointer; const IgnoreCase: boolean = true; const RelaxedWhitespaceChecking: boolean = true): integer; overload;
procedure GetFileFilterExts(const FileFilter: string; Extensions: TStringList); deprecated 'use TFileFilter and TFileFilterList, and then you will not have to deconstruct your filters back from string';
function GetFileFilterName(const FileFilter: string): string; deprecated 'use TFileFilter and TFileFilterList, and then you will not have to deconstruct your filters back from string';
function GetFileFilterExtsStr(const FileFilter: string): string; deprecated 'use TFileFilter and TFileFilterList, and then you will not have to deconstruct your filters back from string';
function SReplacePatterns(const s: string; const patterns, values: array of string; const IgnoreCase: boolean): string; overload;
function SReplacePatterns(const s: string; const patterns, values: TStrings; const IgnoreCase: boolean): string; overload;
function SReplacePatterns(const s: string; const Parameters: TStringStringMap; const IgnoreCase: boolean): string; overload;
function SCharsCount(const s: string; c: char): Cardinal; overload;
function SCharsCount(const s: string; const Chars: TSetOfChars): Cardinal; overload;
function STruncateHash(const s: string): string;
function SUnformattable(const s: string): string;
function SAnsiCompare(const s1, s2: string; IgnoreCase: boolean): Integer;
function SAnsiSame(const s1, s2: string; IgnoreCase: boolean): boolean;
function SPercentReplace(const InitialFormat: string; const Replaces: array of TPercentReplace; out ReplacementsDone: Cardinal; ErrorOnUnknownPercentFormat: boolean = true; PercentChar: char ='%'; IgnoreCase: boolean = false): string; overload; deprecated 'use standard StrUtils.StringsReplace instead';
function SPercentReplace(const InitialFormat: string; const Replaces: array of TPercentReplace; ErrorOnUnknownPercentFormat: boolean = true; PercentChar: char ='%'; IgnoreCase: boolean = false): string; overload; deprecated 'use standard StrUtils.StringsReplace instead';
function FormatNameCounter(const NamePattern: string; const Index: Integer; const AllowOldPercentSyntax: boolean; out ReplacementsDone: Cardinal): string; overload;
function FormatNameCounter(const NamePattern: string; const Index: Integer; const AllowOldPercentSyntax: boolean): string; overload;
function DigitAsChar(b: byte): char;
function DigitAsByte(c: char): byte;
function IntToStrZPad(n: integer; minLength: integer): string;
function IntToStrThousands(const Value: Int64; const Separator: char): string; overload;
function IntToStrThousands(const Value: Int64; const Separator: string): string; overload;
function IntToStrBase(const n: Int64; Base: Byte): string; overload;
function IntToStrBase( n: QWord; Base: Byte): string; overload;
function IntToStrBase(const n: Int64; Base: Byte; minLength: Cardinal): string; overload;
function IntToStrBase(const n: QWord; Base: Byte; minLength: Cardinal): string; overload;
function IntToStr2(n: Int64; const MinLength: Cardinal = 1; const ZeroDigit: char = '0'; const OneDigit: char = '1'; const MinusSign: char = '-'): string; overload;
function IntToStr16(const n: Int64; const minLength: Cardinal = 1): string; overload;
function IntToStr16(const n: QWord; const minLength: Cardinal = 1): string; overload;
function PointerToStr(Ptr: Pointer): string;
function Str2ToInt(const s: string): integer;
function StrHexToInt(const s: string): Int64;
function StrToFloatDef(const s: string; DefValue: Extended): Extended;
function SetToStr(const SetVariable; NumStart, NumEnd: byte): string;
function CharSetToStr(const SetVariable: TSetOfChars): string;
function PCharOrNil(const s: string): PChar;
function SCompressWhiteSpace(const S: string): string;
procedure SCheckChars(const S: string; const ValidChars: TSetOfChars; const RaiseExceptionOnError: boolean = true);
function TrimEndingNewline(const S: String): String;
function SizeToStr(const Value: QWord): String;

Types

TDynamicStringArray = array of string;
TSearchOptions = set of (soMatchCase, soWholeWord, soBackwards);
TSetOfChars = SysUtils.TSysCharSet;

Constants

AllChars = [Low(AnsiChar) .. High(AnsiChar)];
DefaultWordBorders = AllChars - ['a'..'z', 'A'..'Z', '0'..'9', '_'];
WhiteSpaces = [' ', #9, #10, #13];
SimpleAsciiCharacters = [#32 .. #126];
CtrlA = Chr(Ord('a') - Ord('a') + 1);
CtrlB = Chr(Ord('b') - Ord('a') + 1);
CtrlC = Chr(Ord('c') - Ord('a') + 1);
CtrlD = Chr(Ord('d') - Ord('a') + 1);
CtrlE = Chr(Ord('e') - Ord('a') + 1);
CtrlF = Chr(Ord('f') - Ord('a') + 1);
CtrlG = Chr(Ord('g') - Ord('a') + 1);
CtrlH = Chr(Ord('h') - Ord('a') + 1);
CtrlI = Chr(Ord('i') - Ord('a') + 1);
CtrlJ = Chr(Ord('j') - Ord('a') + 1);
CtrlK = Chr(Ord('k') - Ord('a') + 1);
CtrlL = Chr(Ord('l') - Ord('a') + 1);
CtrlM = Chr(Ord('m') - Ord('a') + 1);
CtrlN = Chr(Ord('n') - Ord('a') + 1);
CtrlO = Chr(Ord('o') - Ord('a') + 1);
CtrlP = Chr(Ord('p') - Ord('a') + 1);
CtrlQ = Chr(Ord('q') - Ord('a') + 1);
CtrlR = Chr(Ord('r') - Ord('a') + 1);
CtrlS = Chr(Ord('s') - Ord('a') + 1);
CtrlT = Chr(Ord('t') - Ord('a') + 1);
CtrlU = Chr(Ord('u') - Ord('a') + 1);
CtrlV = Chr(Ord('v') - Ord('a') + 1);
CtrlW = Chr(Ord('w') - Ord('a') + 1);
CtrlX = Chr(Ord('x') - Ord('a') + 1);
CtrlY = Chr(Ord('y') - Ord('a') + 1);
CtrlZ = Chr(Ord('z') - Ord('a') + 1);
CharBackSpace = #8;
CharTab = #9;
CharEnter = #13;
CharEscape = #27;
CharDelete = #127;

Description

Functions and Procedures

function RandomString: string;
 
procedure StringReplaceAllVar(var S: string; const FromPattern, ToPattern: string; IgnoreCase: boolean = true); overload;

Replace all occurrences of FromPattern string to ToPattern string, within another string S.

StringReplaceAllVar(s, from, to) is actually equivalent to simply s := StringReplace(s, from, to, [rfReplaceAll, rfIgnoreCase]). So StringReplaceAllVar is just a wrapper for very common use case of StringReplace.

function BreakLine(const s: string; const MaxCol: integer; const AllowedBreakChars: TSetOfChars = WhiteSpaces): string; overload;

Insert newline characters into string S, such that each line has at most MaxCol chars.

It tries to insert newline sequence at the last character in AllowedBreakChars but still before MaxCol limit, and the character in AllowedBreakChars is deleted in this case. In other words, in most typical situation it simply breaks the string where the whitespace is, trying to make the line as long as possible within MaxCol limit. If no such character in AllowedBreakChars is found (e.g., you put a long line of non-white characters), it will still break the string at MaxCol position (so in this exceptional case, it will cause a break in the middle of the word).

While breaking the string in the middle of the word in not nice, this allows us a safe feeling that this will always break the string into MaxCol chunks.

This intelligently recognizes already existing newline characters (#13, #10, #13#10 or #10#13) in the string, so e.g. it will not insert more newline characters when they are not necessary.

The Indent is added after every newline. This is a bit more powerful than simply specyfing Newline parameter as NL + Indent, because this function also adds Indent after existing newlines in the input string.

function BreakLine(const s: string; const MaxCol: integer; const AllowedBreakChars: TSetOfChars; const Newline: string; const Indent: string): string; overload;
 
function SDeleteChars(const s: string; const excludedChars: TSetOfChars): string;

Returns S with all chars in ExcludedChars deleted.

function SReplaceChars(const s, FromChars, ToChars: string): string; overload;

Replace all occurrences of characters in FromChars with the new string / character. There are three overloaded versions:

  1. SReplaceChars(string, string, string) looks in S for characters within FromChars, and replaces them with characters on appropriate position in ToChars. For example, SReplaceChars(S, 'ab', 'cd') replaces all occurrences of 'a' into 'c' and all occurrences of 'b' into 'd'. It must always be Length(FromChars) <= Length(ToChars).

  2. SReplaceChars(string, TSetOfChars, char) replaces all occurrences of any character in given set with the one specified character.

  3. SReplaceChars(string, char, char) simply replaces all occurrences of one character into another.

function SReplaceChars(const s: string; FromChars: TSetOfChars; ToChar: char): string; overload;
 
function SReplaceChars(const s: string; FromChar, ToChar: char): string; overload;
 
function SPad(const s: string; len: integer; c: char = ' '): string; overload;

Pad (fill from the left with character C) string S, until length of resulting string is at least Len.

For example, SPad('29', 4, '0') gives '0029'

function SZeroPad(const s: string; len: integer): string;

Pad (fill from the left) with zeros string S, until length of resulting string is at least Len. It's actually just a shortcut for SPad with padding character set to '0'.

function LoCase(c: char): char;

Convert uppercase letters to lowercase. Analogous to UpCase. Doesn't change other characters. Just like UpCase, this doesn't take current locale into account, and works only on English A-Z -> a-z letters.

function CharPos(c: char; const s: string; Offset: Integer = 1): integer; deprecated 'use SysUtils.Pos or StrUtils.PosEx instead';

Warning: this symbol is deprecated: use SysUtils.Pos or StrUtils.PosEx instead

 
function CharsPos(const chars: TSetOfChars; const s: string): integer;

Find first occurrence of any character in Chars in string S. This is quite like FirstDelimiter but it takes parameter as TSetOfChars and has much more sensible name.

BackCharsPos does the same, but from the end of the string (i.e. finds the last occurrence).

CharsPosEx searches starting from Offset char.

They all return 0 if not found.

function CharsPosEx(const chars: TSetOfChars; const s: string; Offset: Integer): integer;
 
function BackCharsPos(const chars: TSetOfChars; const s: string): integer;
 
function BackPos(const SubString, S: string): Integer; overload;

Find last occurrence of SubString within S. 0 if not found. Overloaded version is optimized for searching for single character.

function BackPos(const SubString: char; const S: string): Integer; overload;
 
function FirstDelimiter(const Delimiters, S: string): Integer;

Find first occurrence of character in Delimiters. Name is analogous to LastDelimiter. Returns 0 if not found.

function SEnding(const s: string; P: integer): string;

Returns suffix of S starting from position P. Returns '' if P > length(S). Yes, this is simply equivalent to Copy(S, P, MaxInt).

function IsPrefix(const Prefix, S: string; IgnoreCase: boolean = true): boolean; overload;
 
function IsSuffix(const Suffix, S: string; IgnoreCase: boolean = true): boolean; overload;
 
function PrefixRemove(const Prefix, S: string; IgnoreCase: boolean): string;

Removes the prefix, if it is present. More precisely, if IsPrefix(Prefix, S, IgnoreCase) then returns S with this prefix removed. Else returns S.

function SuffixRemove(const Suffix, S: string; IgnoreCase: boolean): string;

Like PrefixRemove, but checks for and removes Suffix.

procedure SAppendData(var s: string; const Data; DataSize: integer); deprecated 'this function is not very useful';

Warning: this symbol is deprecated: this function is not very useful

Appends to a string S DataSize bytes from Data.

function SChar(const s: string; CharNum: integer): PChar; deprecated 'this function is not very useful';

Warning: this symbol is deprecated: this function is not very useful

A pointer to S[CharNum], that is just @S[CharNum], avoiding range checking.

function SCharIs(const s: string; index: integer; c: char): boolean; overload;

Check whether S[Index] = C, also checking is Index within S length. Return false if S is too short, or the chatacter differs.

function SCharIs(const s: string; index: integer; const chars: TSetOfChars): boolean; overload;
 
function SReadableForm(const s: string): string; overload;

Replace typically unreadable characters in string S with #number notation. Useful for printing strings with some unprintable chars for debugging purposes.

function SReadableForm(const C: char): string; overload;
 
function CopyPos(const s: string; StartPosition, EndPosition: integer): string;

Return S[StartPosition..EndPosition]. This is similar to standard Copy procedure, but last parameter is EndPosition instead of Count, which is more comfortable sometimes.

procedure DeletePos(var S: string; StartPosition, EndPosition: Integer);

Delete from S range of characters [StartPosition..EndPosition]. Analogous to standard Delete but with EndPosition parameter (while standard Delete takes Count).

function NextToken(const S: string; var SeekPos: Integer; const TokenDelims: TSetOfChars = WhiteSpaces): string;

Find next part in the string S separated by delimiters TokenDelims. More precisely: search S, starting from position SeekPos, for the first character that is not in TokenDelims. Then, all subsequent characters that are not in TokenDelims are appended to the Result, until any character is in TokenDelims is found. In effect, Result contains the whole part that was in TokenDelims.

SeekPos is advanced to the position of the next character, i.e. the character right after the ending character that was in TokenDelims. In other words, SeekPos points to the position of the next "unprocessed" character in string S. Often you will want to make another call to NextToken, passing this SeekPos, and this way you can split your string S into parts delimited by TokenDelims.

Returns '' if no more tokens available (SeekPos value at the end is unspecified).

Typical use scenario (iterate over all tokens in the string) :

SeekPos := 1;
repeat
  Token := NextToken(S, SeekPos);
  if Token = '' then break;
  { ... process_next_token (Token) ... }
until false;

The above example will split the string into parts separated by whitespace.

Note: it's much easier to use CreateTokens instead of this procedure. But this procedure gives you quite more flexibility.

function NextTokenOnce(const s: string; SeekPos: integer = 1; const TokenDelims: TSetOfChars = WhiteSpaces): string; overload;

NextTokenOnce works just like NextToken, but doesn't advance the SeekPos position. This means that it's quite useless when you're interested in all tokens inside some string, but it's also more comfortable when you're interested in only one token inside some string. When SeekPos = 1, this is the first token.

function CreateTokens(const s: string; const TokenDelims: TSetOfChars = WhiteSpaces): TCastleStringList;

Returns TCastleStringList with tokens extracted from S. Token is something delimited by TokenDelims. TokenDelims are not contained in resulting items. E.g. CreateTokens('foo, bar', [' ', ',']) returns TCastleStringList with 2 items: 'foo' and 'bar'.

function SplitString(const S: string; const Delimiter: char): TCastleStringList;

Split a string by a character delimiter. For example, SplitString('foo|bar', '|') returns a list with 2 parts: 'foo' and 'bar'.

The splitting is done "strictly", which means that we always return exactly one more part than the occurences of delimiter in the source string.

In particular, this means that:

  • If the Delimiter does not occur in the source string, then the result is a list with a single part. This applies even if the source string is empty.

    • SplitString('foo', '|') = ['foo']

    • SplitString('', '|') = ['']

  • If the Delimiter occurs two or more times in a row within the source string, then the we will have one or more empty parts in the resulting list.

    • SplitString('foo||bar', '|') = ['foo', '', 'bar]

    • SplitString('foo|||bar', '|') = ['foo', '', '', 'bar]

  • f the Delimiter occurs at the very end of the source string, then the very last part of the resulting list will be an empty string.

    • SplitString('foo||bar|', '|') = ['foo', '', 'bar, '']

The reverse of this operation is GlueStrings.

See also CreateTokens, for a different way to split, that treats any sequence of delimiters like a single delimiter, and is more suitable e.g. to extract words separated by whitespace. See also standard TStringList.Delimiter feature.

function GlueStrings(const Strings: array of string; const Delimiter: char): string; overload;

Concatenate the string list with a given Delimiter. This is the reverse of SplitString.

function GlueStrings(const Strings: array of string; const Delimiter: string): string; overload;
 
function GlueStrings(const Strings: TStrings; const Delimiter: char): string; overload;
 
function GlueStrings(const Strings: TStrings; const Delimiter: string): string; overload;
 
function FindPos(const SubText, Text: string; StartPosition, Count: integer; const Options: TSearchOptions; const WordBorders: TSetOfChars = DefaultWordBorders): integer; deprecated 'use various StrUtils routines to search in string instead of this (slow, not much useful) routine';

Warning: this symbol is deprecated: use various StrUtils routines to search in string instead of this (slow, not much useful) routine

Find substring SubText within Text. Returns 0 if not found. Similar to a standard Pos function, with some improvements.

Parameters
StartPosition
Starts searching for SubText starting from this position. Note that the resulting position is still returned with respect to the string beginning. Just like standard PosEx.
Count
Looks only at Count characters from Text. You can say that the search is done only within Copy(Text, StartPosition, Count).
Options
Various searching options:

  • soMatchCase: makes searching case-sensitive (by default, case is ignored, taking locale into account).

  • soWholeWord: looks only for SubText occurrences surrounded by characters from WordBorders (or the beginning/end of Text).

    Note that, while the beginning/end of Text is always treated like a word border, but the mere beginning/end of the searching range (StartPosition, Count) is not a word border. For example FindPos('cat', 'foocat dog', 4, MaxInt, [soWholeWord]) will answer 0 (not found), because the only 'cat' occurrence is not surrounded by default word borders.

  • soBackwards: search from the end, that is return rightmost found occurrence.

function SRight(const s: string; const rpart: integer): string;

Return rightmost RPart characters from S. If RPart > Length(S) then returns S.

function SAppendPart(const s, PartSeparator, NextPart: string): string;

If S = '' then returns NextPart, else returns S + PartSeparator + NextPart.

procedure DeFormat(Data: string; const Format: string; const args: array of pointer; const IgnoreCase: boolean = true; const RelaxedWhitespaceChecking: boolean = true); overload;

Parse a string according to the given format, returning the values corresponding to placeholders %x in format string.

Format parameter is a sequence of white spaces, placeholders like %d or %f, and other characters. More precisely:

  • If RelaxedWhitespaceChecking = True (that's the default value) then 1 or more white spaces in Format must correspond to 1 or more any whitespace characters in Data. I.e., the actual number and kind of whitespace in Format and Data doesn't have to match — it's only important that some whitespace in Format correspond to some whitespace in Data.

  • %d in Format means an Integer value in Data. Args should have a pointer to Integer variable on the appropriate position.

    Warning: DeFormat cannot detect the type of your arguments, or check their validity. Make sure in Args you use a pointer to an Integer variable (32-bit, like in FPC ObjFpc or Delphi mode), not e.g. ShortInt or Byte.

  • %f in Format means a float value (possibly signed, possibly with a dot) in Data. Args should have a pointer to Float variable on the appropriate position.

    Warning: DeFormat cannot detect the type of your arguments, or check their validity. Make sure in Args you use a pointer to an Float variable (as defined in Math unit), not e.g. Single or Double or Extended.

  • %.single., %.double., %.extended. are like %f, but they specify appropriate variable type in Args.

  • %.integer., %.cardinal., are like %d, but they specify appropriate variable type in Args.

  • %s in Format means a string (will end on the first whitespace) in Data. Args should contain a pointer to an AnsiString on the appropriate position. Note that I mean it — a pointer to an AnsiString, not just a string typecasted into a pointer. I.e., if S is AnsiString, Args should contain @S, not Pointer(S).

    Note that a string may be empty in some cases, e.g. Format = '%d %s' and Data = '123 ' will result in the empty string as second Args.

  • %% in Format means a one % sign in Data.

  • All the other characters (non-white, not %x sequences above) should be present in Data exactly like they are specified in Format. IgnoreCase controls is the letter case checked. When RelaxedWhitespaceChecking = False then white-space characters are treated just like non-white chars: they must match exactly between Format and Data.

Format must always match the whole Data — in other words, when we finished reading the Format, Data should be finished too. The exception is at the beginning and end of Data, if RelaxedWhitespaceChecking = True : then at the beginning and end of Data any number of white-space is allowed.

For DeFormat, the opposite must also be true: when we finished reading Data, Format should be finished too. However, for TryDeFormat, it's allowed for Data to end prematurely. TryDeFormat returns how many Args were initialized.

Note that while usually you will want RelaxedWhitespaceChecking = True, sometimes it can be needed to set this to False not only to get strickter checking, but also to get some things matching that otherwise wouldn't match. For example, consider Data = 'first second apple' and Format = 'first %s second %s'. With RelaxedWhitespaceChecking these things do not match — because the 1st space character in the Format string "consumes" the 1st and 2nd space characters in the Data. Then '%s' is matched to the word 'second', and the word 'second' is compared with 'apple' and they do not match. If you want such Data and Format to match, you must pass RelaxedWhitespaceChecking = True. Then the first '%s' will be matched to '' (empty string).

This was written because both JclSscanf and scanf units were buggy. (see openGL.testy/nehe10).

Exceptions raised
EDeformatError
In case of any error — mismatch between Format and Data. Note that in case of error, some of Args may be initialized, and some not — no guarantees here, sorry.
function TryDeFormat(Data: string; const Format: string; const args: array of pointer; const IgnoreCase: boolean = true; const RelaxedWhitespaceChecking: boolean = true): integer; overload;
 
procedure GetFileFilterExts(const FileFilter: string; Extensions: TStringList); deprecated 'use TFileFilter and TFileFilterList, and then you will not have to deconstruct your filters back from string';

Warning: this symbol is deprecated: use TFileFilter and TFileFilterList, and then you will not have to deconstruct your filters back from string

Extract file extensions from a file filter usually specified a TOpenDialog.Filter value.

More precisely: expects FileFilter to be in the form of 'xxxx|name1.ext1;name2.ext2'. Where "xxxx" is just about anything (it is ignored), and in fact whole "xxxx|" (with bar) may be omitted. The rest (after "|") is treated as a filename list, separated by semicolon ";".

As Extensions contents, we set an array of all extensions extracted from these filenames. For example above, we would set Extensions to array with two items: ['.ext1', '.ext2'].

function GetFileFilterName(const FileFilter: string): string; deprecated 'use TFileFilter and TFileFilterList, and then you will not have to deconstruct your filters back from string';

Warning: this symbol is deprecated: use TFileFilter and TFileFilterList, and then you will not have to deconstruct your filters back from string

Extract file filter name, from a file filter usually specified a TOpenDialog.Filter value.

More precisely: if we do not see bar "|" character, then this is the filter name. Otherwise, everything on the right of "|" is "extensions" and everything on the left is "filter name".

Additionally, if filter name ends with extensions value in parenthesis, they are removed. In other words, for 'Pascal files (*.pas)|*.pas', this will return just 'Pascal files'. The '(*.pas)' was removed from the filter name, because we detected this just repeats the extensions on the right of "|". Extensions on the right of "|" must be separated by semicolons, extensions within parenthesis on the left of "|" may be separated by semicolons ";" or colons ",".

function GetFileFilterExtsStr(const FileFilter: string): string; deprecated 'use TFileFilter and TFileFilterList, and then you will not have to deconstruct your filters back from string';

Warning: this symbol is deprecated: use TFileFilter and TFileFilterList, and then you will not have to deconstruct your filters back from string

Search in FileFilter for the bar character "|", and return everything after it. This is a simple basis for GetFileFilterExts.

If no "|" found, we return an empty string (in other words, file filter without "|" is treated as just a filter name, without any extensions).

function SReplacePatterns(const s: string; const patterns, values: array of string; const IgnoreCase: boolean): string; overload;

Replace all strings in Patterns with corresponding strings in Values. This is similar to standard StringReplace, but this does many replacements at once. This is just like StrUtils.StringsReplace in FPC.

Patterns and Values arrays must have equal length. Patterns[0] will be replaced with Values[0], Patterns[1] with Values[0] etc. Patterns are scanned from left to right, that is if two pattern occurrences overlap — we will detect the leftmost one. If both patterns start at the same place (this means that one pattern is a prefix of the other), we will choose the first pattern in Patterns table.

Using this avoids a common trap at repeated search-replace operations. A naive implementation of doing many search-replace over the same string is like

Result := S;
Result := StringReplace(Result, Patterns[0], Values[0], [rfReplaceAll]);
Result := StringReplace(Result, Patterns[1], Values[1], [rfReplaceAll]);
// etc.

But the above fails badly when inserting some Values[] creates an occurrence of Pattern checked later. For example, when Values[0] contains inside whole Patterns[1]. More exotic situations involve when some Values[] glues with previous string contents to make a pattern detected later. This means that you could replace the same content many times, which is usually not what you want.

That's why you should instead use this function for such situations.

function SReplacePatterns(const s: string; const patterns, values: TStrings; const IgnoreCase: boolean): string; overload;
 
function SReplacePatterns(const s: string; const Parameters: TStringStringMap; const IgnoreCase: boolean): string; overload;
 
function SCharsCount(const s: string; c: char): Cardinal; overload;
 
function SCharsCount(const s: string; const Chars: TSetOfChars): Cardinal; overload;
 
function STruncateHash(const s: string): string;

Remove from the string S everything after the first hash "#" character. Removes also this very "#" character.

If string doesn't contain hash character, it's simply returned.

Useful for interpreting simple text files when you want to treat things after "#" like a comment.

function SUnformattable(const s: string): string;

Return the value to reproduce exactly string S by Format procedure. Saying simply, this doubles the "%" characters inside the string. The intention is to make such string that Format(SUnformattable(S), []) = S. In other words, "quote" any suspicious "%" characters in S for Format.

function SAnsiCompare(const s1, s2: string; IgnoreCase: boolean): Integer;

Compare strings, taking into account current locale. This simply does AnsiCompareStr or AnsiCompareText, depending on IgnoreCase.

Returns value < 0 when S1 < S2, returns 0 when S1 = S2 and value > 0 when S1 > S2.

function SAnsiSame(const s1, s2: string; IgnoreCase: boolean): boolean;

Check if strings are equal, taking into account current locale. Shortcut for SAnsiCompare(S1, S2) = 0

function SPercentReplace(const InitialFormat: string; const Replaces: array of TPercentReplace; out ReplacementsDone: Cardinal; ErrorOnUnknownPercentFormat: boolean = true; PercentChar: char ='%'; IgnoreCase: boolean = false): string; overload; deprecated 'use standard StrUtils.StringsReplace instead';

Warning: this symbol is deprecated: use standard StrUtils.StringsReplace instead

Searches for %x patterns and replaces them with specified strings. Something like a more generalized Format routine.

More precisely: every two-char sequence that starts with PercentChar and then is followed by one of Replaces[I].c characters is replaced with appropriate Replaces[i].s. Moreover, a pair of two PercentChar characters is replaced with one PercentChar character.

For example, assume that Replaces contains two items: (c: 'B'; s: '<bold>'), (c: 'b'; s: '</bold>'). Then SPercentReplace('100%% of cats are %Bcute%b', Replaces) will return string '100% of cats are <bold>cute</bold>'.

EUnknownPercentFormat is raised if we will see two-char sequence that starts with PercentChar and then is followed by character that is not any Replaces[i].c and is not PercentChar. Also, a single PercentChar at the end of the string is an error.

For example, assume that Replaces contains the same two items as previously. Following calls will result in EUnknownPercentFormat being raised: SPercentReplace('Unknown sequence %x', Replaces), SPercentReplace('Unterminated sequence %', Replaces).

If ErrorOnUnknownPercentFormat is False, then EUnknownPercentFormat will not be raised. Instead, incorrect sequence (like %x or unterminated % in examples above) will simply be left in the string.

Of course, replacing is done intelligently. Which means that e.g. sequence of four % characters will be correctly transformed into two % characters.

Note that IgnoreCase is used to match characters for Replaces[I].c. IgnoreCase is not used when it comes to comparing with PercentChar character, i.e. even when PercentChar will be set to some letter, it will always be compared in case-sensitive manner, regardless of IgnoreCase value.

It is undefined (meaning: don't do it) what happens if Replaces array contains more than once the same character C, or if any character C in Replaces array is equal to PercentChar.

ReplacementsDone, if passed, will return how many replacements were done. Not counting "meaningless" replacements of pair of PercentChar to one PercentChar (that is, we count only actual replacements from Replaces array).

Exceptions raised
EUnknownPercentFormat
In case of error in InitialFormat string, if ErrorOnUnknownPercentFormat is True.
function SPercentReplace(const InitialFormat: string; const Replaces: array of TPercentReplace; ErrorOnUnknownPercentFormat: boolean = true; PercentChar: char ='%'; IgnoreCase: boolean = false): string; overload; deprecated 'use standard StrUtils.StringsReplace instead';

Warning: this symbol is deprecated: use standard StrUtils.StringsReplace instead

 
function FormatNameCounter(const NamePattern: string; const Index: Integer; const AllowOldPercentSyntax: boolean; out ReplacementsDone: Cardinal): string; overload;

Replace sequences @counter(<padding>) in the NamePattern with Index. Any sequence @counter(<padding>) is detected (where <padding> is any integer >= 0) and replaced with Index padded with zeros (to given <padding> length).

If AllowOldPercentSyntax is True then we also allow older deprecated syntax: replace %d in the NamePattern with Index. This is used only if @counter(<padding>) was not found in NamePattern.

  • %d is replaced with Index.

    You can insert a non-negative number between % and d, to pad the counter with zeros to desired length. For example, with Counter = 2, %d is replaced with just "2", %2d is replaced with "02", %4d is replaced with "0002".

  • %% is replaced with single percent char %.

  • Everything else is just copied to resulting string. Not recognized %-patterns are also just copied. The main purpose of this is to specify filenames with optional placeholders, so unrecognized stuff should be gracefully ignored.

The percent syntax was deprecated as it cannot be used with URLs. Inside URLs, percent character must always be encodede as %25. Sequence like %4d must mean letter "M" (ASCII 77, which is 4d in hexadecimal) inside URL. We could potentially allow syntax like %25d or %254d (4-digit counter), but that's just ugly, and compatibility had to be broken anyway (after Castle Game Engine 4.0.1, you have to fix URLs to image sequences anyway, as %4d must mean letter "M").

See https://castle-engine.io/x3d_extensions.php#section_ext_movie_from_image_sequence for an example when this is useful.

function FormatNameCounter(const NamePattern: string; const Index: Integer; const AllowOldPercentSyntax: boolean): string; overload;
 
function DigitAsChar(b: byte): char;

Convert digit (like number 0) to character (like '0'). Use only for arguments within 0..9 range.

function DigitAsByte(c: char): byte;

Convert digit character (like '0') to a number (like 0). Use only for characters in '0'...'9' range.

function IntToStrZPad(n: integer; minLength: integer): string;

Convert integer to string, padding string with zeros if needed.

function IntToStrThousands(const Value: Int64; const Separator: char): string; overload;

Convert integer to string, inserting additional Separator to visually delimit thousands, milions etc.

function IntToStrThousands(const Value: Int64; const Separator: string): string; overload;
 
function IntToStrBase(const n: Int64; Base: Byte): string; overload;

Convert integer to string, in base-Base (like base-16) numeral system. For digits above '9', we will use upper letters 'A', 'B'... etc. That's also why Base cannot be larger than 'Z'-'A' + 1 + 10 (we would not have enough digits then).

Overloaded versions with MinLength pad result with zeros to have at least MinLength.

This is similar to the standard StrUtils.Dec2Numb, but it can handle negative numbers without any problems (adds a minus sign at the beginning then).

function IntToStrBase( n: QWord; Base: Byte): string; overload;
 
function IntToStrBase(const n: Int64; Base: Byte; minLength: Cardinal): string; overload;
 
function IntToStrBase(const n: QWord; Base: Byte; minLength: Cardinal): string; overload;
 
function IntToStr2(n: Int64; const MinLength: Cardinal = 1; const ZeroDigit: char = '0'; const OneDigit: char = '1'; const MinusSign: char = '-'): string; overload;

Convert integer to binary (base-2 numeral system). MinLength means to left-pad result with zeros if necessary.

This is similar to the standard StrUtils.IntToBin function, but this works in an obvious way for negative numbers (adds a minus sign at the beginning).

function IntToStr16(const n: Int64; const minLength: Cardinal = 1): string; overload;

Convert integer to hexadecimal (base-16 numeric system).

This is similar to the standard SysUtils.IntToHex function, but this works in an obvious way for negative numbers (adds a minus sign at the beginning). Contrast this with SysUtils.IntToHex, that instead shows the value of negative integer typecasted as unsigned (so it's in 2-completent). So IntToHex(-1, 1) = 'FFFFFFFF', while IntToStr16(-1) = '-1'.

function IntToStr16(const n: QWord; const minLength: Cardinal = 1): string; overload;
 
function PointerToStr(Ptr: Pointer): string;

Returns Ptr as 0xXXX... hexadecimal value. "0x" is not a Pascal standard for coding hex values, but it's so popular that users are more likely to "get" 0x notation.

function Str2ToInt(const s: string): integer;

Convert string representing binary number to an integer. String must contain only '0', '1' (digits) and start with an optional sign (+ or -).

This is similar to the standard StrUtils.Numb2Dec (with Base = 2) function, but this reliably raises EConvertError in case of trouble.

Exceptions raised
EConvertError
In case of invalid string.
function StrHexToInt(const s: string): Int64;

Convert string with hexadecimal number to an integer. String must contain only digits (0-9, a-z, A-Z), and with an optional sign (+ or -).

This is similar to the standard StrUtils.Hex2Dec function, but it returns an Int64 value. So this is safer and more consistent with standard StrToInt.

Exceptions raised
EConvertError
In case of invalid string.
function StrToFloatDef(const s: string; DefValue: Extended): Extended;
 
function SetToStr(const SetVariable; NumStart, NumEnd: byte): string;

Convert a set to a string representation, in somewhat hacky way. This assumes that given SetVariable is a set value, and the set type is "set of [NumStart .. NumEnd]".

Implementation is heavily dependent on how the sets are internally stored. For now, we depend that a set of [NumStart .. NumEnd] behaves like a set of Byte, shifted to the left (i.e., NumStart corresponds to a 0 in set of Byte). This is not necessarily true ! For example in Delphi 5 (as far as I remember — I don't have this Delphi now, and I don't remember on which Delphi version I observed this) set of 1..16 uses first three bytes, and the first bit (that would correspond to 0) is simply wasted. In fact, SizeOf such set is still 4, which means that internally sets eat 4 bytes anyway. But SizeOf set 200..216 is also 4, which means that the compiler is smart and doesn't waste too much space to store only 17 bits.

This all is not a rant on internal set handling by Delphi. On the contrary, Delphi does it for speed reasons, and that's very good. This is just a warning that SetToStr is not really reliable, and you may need to experiment a little with NumStart / NumEnd values to get sensible results. Although if your set is like "set of [0 ... something]", this should usually work OK.

Still: this function should be used only for debug purposes. Don't depend on it working 100% correctly always — it can't, because we can't depend on how compiler stores sets.

function CharSetToStr(const SetVariable: TSetOfChars): string;
 
function PCharOrNil(const s: string): PChar;

PCharOrNil simply returns a Pointer(S), you can think of it as a NO-OP. If string is empty, this returns Nil, otherwise it works just like PChar(S): returns a Pointer(S) with appropriate type cast.

function SCompressWhiteSpace(const S: string): string;

Replace any number of consecutive whitespace (including newlines) with a single whitespace. This is nice when you have a string (possibly multiline) supplied by user, and you want to use this for some UI item (like window's caption or menu item) — this "sanitizes" whitespace inside such string.

procedure SCheckChars(const S: string; const ValidChars: TSetOfChars; const RaiseExceptionOnError: boolean = true);

Check that all characters are within a given set. Raise exception otherwise (if RaiseExceptionOnError, default) or make a warning.

Exceptions raised
EInvalidChar
If string contains an invalid character and RaiseExceptionOnError = True. The exception string is informative, containing the string value, character, character position.
function TrimEndingNewline(const S: String): String;

Remove one newline from the end of the string, if any.

function SizeToStr(const Value: QWord): String;
 

Types

TDynamicStringArray = array of string;
 
TSearchOptions = set of (soMatchCase, soWholeWord, soBackwards);
 
TSetOfChars = SysUtils.TSysCharSet;

A set of chars.

Constants

AllChars = [Low(AnsiChar) .. High(AnsiChar)];
 
DefaultWordBorders = AllChars - ['a'..'z', 'A'..'Z', '0'..'9', '_'];
 
WhiteSpaces = [' ', #9, #10, #13];
 
SimpleAsciiCharacters = [#32 .. #126];
 
CtrlA = Chr(Ord('a') - Ord('a') + 1);
 
CtrlB = Chr(Ord('b') - Ord('a') + 1);
 
CtrlC = Chr(Ord('c') - Ord('a') + 1);
 
CtrlD = Chr(Ord('d') - Ord('a') + 1);
 
CtrlE = Chr(Ord('e') - Ord('a') + 1);
 
CtrlF = Chr(Ord('f') - Ord('a') + 1);
 
CtrlG = Chr(Ord('g') - Ord('a') + 1);
 
CtrlH = Chr(Ord('h') - Ord('a') + 1);
 
CtrlI = Chr(Ord('i') - Ord('a') + 1);
 
CtrlJ = Chr(Ord('j') - Ord('a') + 1);
 
CtrlK = Chr(Ord('k') - Ord('a') + 1);
 
CtrlL = Chr(Ord('l') - Ord('a') + 1);
 
CtrlM = Chr(Ord('m') - Ord('a') + 1);
 
CtrlN = Chr(Ord('n') - Ord('a') + 1);
 
CtrlO = Chr(Ord('o') - Ord('a') + 1);
 
CtrlP = Chr(Ord('p') - Ord('a') + 1);
 
CtrlQ = Chr(Ord('q') - Ord('a') + 1);
 
CtrlR = Chr(Ord('r') - Ord('a') + 1);
 
CtrlS = Chr(Ord('s') - Ord('a') + 1);
 
CtrlT = Chr(Ord('t') - Ord('a') + 1);
 
CtrlU = Chr(Ord('u') - Ord('a') + 1);
 
CtrlV = Chr(Ord('v') - Ord('a') + 1);
 
CtrlW = Chr(Ord('w') - Ord('a') + 1);
 
CtrlX = Chr(Ord('x') - Ord('a') + 1);
 
CtrlY = Chr(Ord('y') - Ord('a') + 1);
 
CtrlZ = Chr(Ord('z') - Ord('a') + 1);
 
CharBackSpace = #8;
 
CharTab = #9;
 
CharEnter = #13;
 
CharEscape = #27;
 
CharDelete = #127;
 

Generated by PasDoc 0.15.0.