1
votes

I've got the following code, and need to strip all non alpha numeric characters. It's not working in delphi 2009

unit Unit2;
//Used information from
// http://stackoverflow.com/questions/574603/what-is-the-fastest-way-of-stripping-non-alphanumeric-characters-from-a-string-in

interface

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, StdCtrls;
Type
     TExplodeArray = Array Of String;

  TForm2 = class(TForm)
    Memo1: TMemo;
    ListBox1: TListBox;
    Button1: TButton;
    procedure Button1Click(Sender: TObject);
  private
    { Private declarations }
  public
    { Public declarations }
    Function Explode ( Const cSeparator, vString : String ) : TExplodeArray;
    Function Implode ( Const cSeparator : String; Const cArray : TExplodeArray ) : String;
    Function StripHTML ( S : String ) : String;
    function allwords(data:string):integer;
  end;

var
  Form2: TForm2;
  allword, allphrase: TExplodeArray;

implementation
{$R *.dfm}
Function TForm2.StripHTML ( S : String ) : String;
Var
     TagBegin, TagEnd, TagLength : Integer;
Begin
     TagBegin := Pos ( '<', S );      // search position of first <

     While ( TagBegin > 0 ) Do
          Begin  // while there is a < in S
          TagEnd := Pos ( '>', S );              // find the matching >
          TagLength := TagEnd - TagBegin + 1;
          Delete ( S, TagBegin, TagLength );     // delete the tag
          TagBegin := Pos ( '<', S );            // search for next <
          End;

     Result := S;                   // give the result
End;
Function TForm2.Implode ( Const cSeparator : String; Const cArray : TExplodeArray ) : String;
Var
     i : Integer;
Begin
     Result := '';
     For i := 0 To Length ( cArray ) - 1 Do
          Begin
          Result := Result + cSeparator + cArray [i];
          End;
     System.Delete ( Result, 1, Length ( cSeparator ) );
End;

Function TForm2.Explode ( Const cSeparator, vString : String ) : TExplodeArray;
Var
     i : Integer;
     S : String;
Begin
     S := vString;
     SetLength ( Result, 0 );
     i := 0;
     While Pos ( cSeparator, S ) > 0 Do
          Begin
          SetLength ( Result, Length ( Result ) + 1 );
          Result[i] := Copy ( S, 1, Pos ( cSeparator, S ) - 1 );
          Inc ( i );
          S := Copy ( S, Pos ( cSeparator, S ) + Length ( cSeparator ), Length ( S ) );
          End;
     SetLength ( Result, Length ( Result ) + 1 );
     Result[i] := Copy ( S, 1, Length ( S ) );
End;
//Copied from JclStrings
function StrKeepChars(const S: AnsiString; const Chars: TSysCharSet): AnsiString;
var
  Source, Dest: PChar;
begin
  SetLength(Result, Length(S));
  UniqueString(Result);
  Source := PChar(S);
  Dest := PChar(Result);
  while (Source <> nil) and (Source^ <> #0) do
  begin
    if Source^ in Chars then
    begin
      Dest^ := Source^;
      Inc(Dest);
    end;
    Inc(Source);
  end;
  SetLength(Result, (Longint(Dest) - Longint(PChar(Result))) div SizeOf(AnsiChar));
end;
function ReplaceNewlines(const AValue: string): string;
var
  SrcPtr, DestPtr: PChar;
begin
  SrcPtr := PChar(AValue);
  SetLength(Result, Length(AValue));
  DestPtr := PChar(Result);
  while SrcPtr <> {greater than less than} #0 do begin
    if (SrcPtr[0] = #13) and (SrcPtr[1] = #10) then begin
      DestPtr[0] := '\';
      DestPtr[1] := 't';
      Inc(SrcPtr);
      Inc(DestPtr);
    end else
      DestPtr[0] := SrcPtr[0];
    Inc(SrcPtr);
    Inc(DestPtr);
  end;
  SetLength(Result, DestPtr - PChar(Result));
end;
function StripNonAlphaNumeric(const AValue: string): string;
var
  SrcPtr, DestPtr: PChar;
begin
  SrcPtr := PChar(AValue);
  SetLength(Result, Length(AValue));
  DestPtr := PChar(Result);
  while SrcPtr <> #0 do begin
    if SrcPtr[0] in ['a'..'z', 'A'..'Z', '0'..'9'] then begin
      DestPtr[0] := SrcPtr[0];
      Inc(DestPtr);
    end;
    Inc(SrcPtr);
  end;
  SetLength(Result, DestPtr - PChar(Result));
end;
function TForm2.allwords(data:string):integer;
var i:integer;
begin
  listbox1.Items.add(data);
  data:= StripHTML ( data );
  listbox1.Items.add(data);
  //////////////////////////////////////////////////////////////
  data := StrKeepChars(data, ['A'..'Z', 'a'..'z', '0'..'9']);
  // Strips out everything data comes back blank in Delphi 2009
  //////////////////////////////////////////////////////////////
  listbox1.Items.add(data);
  data := stringreplace(data,'  ',' ', [rfReplaceAll, rfIgnoreCase] );
  //Replace two spaces with one.
  listbox1.Items.add(data);
  allword:= explode(' ',data);
 { // Converting the following PHP code to Delphi
    $text = ereg_replace("[^[:alnum:]]", " ", $text);
    while(strpos($text,'  ')!==false) $text = ereg_replace("  ", " ", $text);
    $text=$string=strtolower($text);
    $text=explode(" ",$text);
    return count($text);
}
 for I := 0 to Length(allword) - 1 do
 listbox1.Items.Add(allword[i]);
end;
procedure TForm2.Button1Click(Sender: TObject);
begin
//[^[:alnum:]]

allwords(memo1.Text);
end;

end.

How else would I go about doing this?

3

3 Answers

1
votes

Easiest solution that comes to mind is defining a regular expression that returns the input string minus any non-alpha characters in it.

1
votes

It's been a while since I did much with Delphi - version 5 was my playground.

Isn't one of the primary features of Delphi 2009 that it's now Unicode throughout, by default.

This has impact on anything that tries to process character by character. Could it be the source of your problem?

1
votes

Uses StrUtils; //StuffString

var
    Regex: TPerlRegEx;
  I:Integer;
begin
Regex := TPerlRegEx.Create(nil);
Regex.RegEx := '[^[:alnum:]]';
Regex.Options := [preMultiLine];
Regex.Subject := data;
if Regex.Match then begin
    repeat
    data := StuffString(data,Regex.MatchedExpressionOffset,Regex.MatchedExpressionLength,' ');
    until not Regex.MatchAgain;
end;