[杂谈]使用 GB2312 编码转义URL字符串

【更新】

2016.11.12

QString 新增了 UrlEncode 函数,优化了性能,速度比官方的版本快一点,更重要的是可以ANSI和UTF8编码,或者是自己喜欢的编码都行。

function UrlEncode(const ABytes: PByte; l: Integer; ASpacesAsPlus: Boolean)
  : QStringW; overload;
function UrlEncode(const ABytes: TBytes; ASpacesAsPlus: Boolean)
  : QStringW; overload;
function UrlEncode(const S: QStringW; ASpacesAsPlus: Boolean;
  AUtf8Encode: Boolean = True): QStringW; overload;

三个重载,前两个方便使用自定义的编码的,第三个重载如果不使用UTF-8编码,就使用 ANSI 编码来处理。

 

实际上这个代码是直接改的系统的TURI.EncodeUrl,只是将默认的UTF-8编码更换为了Ansi

function AnsiURLEncode(const AValue: string; SpacesAsPlus: Boolean): string;

  function IsHexChar(C: Byte): Boolean;
  begin
    case Char(C) of
      '0' .. '9', 'a' .. 'f', 'A' .. 'F':
        Result := True;
    else
      Result := False;
    end;
  end;

// from http://www.faqs.org/rfcs/rfc3986.html
// URL safe chars = ['A' .. 'Z', 'a' .. 'z', '0' .. '9', '-', '_', '.', '~'];
const
  URLSafeCharMatrix: array [33 .. 127] of Boolean = (False, False, False, False,
    False, False, False, False, False, False, False, False, True, True, False,
    True, True, True, True, True, True, True, True, True, True, False, False,
    False, False, False, False, False, True, True, True, True, True, True, True,
    True, True, True, True, True, True, True, True, True, True, True, True,
    True, True, True, True, True, True, True, False, False, False, False, True,
    False, True, True, True, True, True, True, True, True, True, True, True,
    True, True, True, True, True, True, True, True, True, True, True, True,
    True, True, True, False, False, False, True, False);

  XD: array [0 .. 15] of Char = ('0', '1', '2', '3', '4', '5', '6', '7', '8',
    '9', 'A', 'B', 'C', 'D', 'E', 'F');
var
  Buff: TBytes;
  I: Integer;
begin
  Buff := TEncoding.ANSI.GetBytes(AValue);
  Result := '';
  I := 0;
  while I < Length(Buff) do
  begin
    if (I + 2 < Length(Buff)) and (Buff[I] = Ord('%')) then
      if IsHexChar(Buff[I + 1]) and IsHexChar(Buff[I + 2]) then
      begin
        Result := Result + '%' + Char(Buff[I + 1]) + Char(Buff[I + 2]);
        Inc(I, 3);
        Continue;
      end;

    if (Buff[I] = Ord(' ')) and SpacesAsPlus then
      Result := Result + '+'
    else
    begin
      if (Buff[I] >= 33) and (Buff[I] <= 127) then
      begin
        if URLSafeCharMatrix[Buff[I]] then
          Result := Result + Char(Buff[I])
        else
          Result := Result + '%' + XD[(Buff[I] shr 4) and $0F] +
            XD[Buff[I] and $0F];
      end
      else
        Result := Result + '%' + XD[(Buff[I] shr 4) and $0F] +
          XD[Buff[I] and $0F];
    end;
    Inc(I);
  end;
end;

 

分享到: