Anyone can help with how to get a text coordinates? can this be possible? because I just wanted a windows form app where the user types a word in a text box, and the app reads existing PDF using iTextSharp, highlights the matched words if found, and saves the PDF with highlighted text. so far i have almost everything done, including the drawing of a yellow rectangle, but what is lacking is how to get the text coordinates of the matched patterns to highlight them, thanks in advance: (by the way: sb is the search text box, tb is a rich text box where the PDF text is exhibited)
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.IO;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
using iTextSharp.text;
using System.Text.RegularExpressions;
namespace manipulatePDF
{
public partial class Form1 : Form
{
string oldFile;
Document document = new Document();
StringBuilder text = new StringBuilder();
public Form1()
{
InitializeComponent();
}
private void open_Click(object sender, EventArgs e)
{
reset_Click(sender, e);
openFileDialog1.Filter = "PDF Files (.pdf)|*.pdf";
openFileDialog1.FilterIndex = 1;
if (openFileDialog1.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
label1.Text = "File Location: " + openFileDialog1.FileName;
oldFile = openFileDialog1.FileName;
// open the reader
PdfReader reader = new PdfReader(oldFile);
iTextSharp.text.Rectangle size = reader.GetPageSizeWithRotation(1);
document.SetPageSize(size);
for (int cPage = 1; cPage <= reader.NumberOfPages; cPage++)
{
ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
string currentText = PdfTextExtractor.GetTextFromPage(reader, cPage, strategy);
currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
text.Append(currentText);
reader.Close();
}
tb.Text = text.ToString();
}
}
private void save_Click(object sender, EventArgs e)
{
saveFileDialog1.InitialDirectory = "C: ";
saveFileDialog1.Title = "Save the PDF File";
saveFileDialog1.Filter = "PDF files (*.pdf)|*.pdf";
if (saveFileDialog1.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
PdfReader reader = new PdfReader(oldFile);
string newFile = saveFileDialog1.FileName;
// open the writer
FileStream fs = new FileStream(newFile, FileMode.Create, FileAccess.Write);
PdfWriter writer = PdfWriter.GetInstance(document, fs);
document.Open();
// the pdf content
PdfContentByte cb = writer.DirectContent;
// select the font properties
PdfGState graphicsState = new PdfGState();
graphicsState.FillOpacity = 10;
cb.SetGState(graphicsState);
int index = 0;
while (index < text.ToString().LastIndexOf(sb.Text))
{
if (contain.Checked == true)
{
tb.Find(sb.Text, index, tb.TextLength, RichTextBoxFinds.MatchCase);
tb.SelectionBackColor = Color.Gold;
index = tb.Text.IndexOf(sb.Text, index) + 1;
}
else if (exact.Checked == true)
{
tb.Find(sb.Text, index, tb.TextLength, RichTextBoxFinds.WholeWord);
tb.SelectionBackColor = Color.Gold;
index = tb.Text.IndexOf(sb.Text, index) + 1;
}
}
int count = 0; //counts the pattern occurance
for (int cPage = 1; cPage <= reader.NumberOfPages; cPage++)
{
ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
string currentText = PdfTextExtractor.GetTextFromPage(reader, cPage, strategy);
currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
string textToSearch = sb.Text;
int lastStartIndex = currentText.IndexOf(textToSearch, 0, StringComparison.CurrentCulture);
while (lastStartIndex != -1)//if the pattern was found
{
count++;
lastStartIndex = currentText.IndexOf(textToSearch, lastStartIndex + 1, StringComparison.CurrentCulture);
BaseFont bf = BaseFont.CreateFont(BaseFont.HELVETICA, BaseFont.CP1252, BaseFont.NOT_EMBEDDED);
cb.SetFontAndSize(bf, 10);
cb.SetColorFill(new CMYKColor(0f, 0f, 1f, 0f));
cb.Rectangle(document.PageSize.Width - 500f, 600f, 100f, 100f);
cb.Fill();
}
if (count != 0)
{
if (contain.Checked == true)
{
label2.Text = "Number of pages: " + cPage + " - " + textToSearch + " found " + count + " times. \n";
}
else if (exact.Checked == true)
{
//finds the words that are bounded by a space or a dot and store in cCount
//returns the count of matched pattern = count - cCount
}
}
text.Append(currentText);
// create the new page and add it to the pdf
PdfImportedPage page = writer.GetImportedPage(reader, cPage);
cb.AddTemplate(page, 0, 0);
document.NewPage();
//PdfStamper stamper = new PdfStamper(reader, fs);
////Create a rectangle for the highlight. NOTE: Technically this isn't used but it helps with the quadpoint calculation
//iTextSharp.text.Rectangle rect = new iTextSharp.text.Rectangle(60.6755f, 749.172f, 94.0195f, 735.3f);
////Create an array of quad points based on that rectangle. NOTE: The order below doesn't appear to match the actual spec but is what Acrobat produces
//float[] quad = { rect.Left, rect.Bottom, rect.Right, rect.Bottom, rect.Left, rect.Top, rect.Right, rect.Top };
////Create our hightlight
//PdfAnnotation highlight = PdfAnnotation.CreateMarkup(stamper.Writer, rect, null, PdfAnnotation.MARKUP_HIGHLIGHT, quad);
////Set the color
//highlight.Color = BaseColor.YELLOW;
////Add the annotation
//stamper.AddAnnotation(highlight, 1);
}
// close the streams
document.Close();
fs.Close();
writer.Close();
reader.Close();
}
}
private void reset_Click(object sender, EventArgs e)
{
tb.Text = "";
}
}