0
votes

I am trying to fill a word document with data from an XML. I am using openXML to fill the document, which works great and save it as .docx. The thing is I have to open Word and save the document as an .odt and then use the OpenOffice SDK to open the .docx and save it as a pdf. When I don't save the .docx as .odt, the formatting is off.

What I need to be able to do is be able to either convert the .docx to .odt or save it originally as .odt.

Here is what I have right now:

    static void Main()
        {

            string documentText;
            XmlDocument xmlDoc = new XmlDocument(); // Create an XML document object
            xmlDoc.Load("C:\\Cache\\MMcache.xml"); // Load the XML document from the specified file



            XmlNodeList PatientFirst = xmlDoc.GetElementsByTagName("PatientFirst");

            XmlNodeList PatientSignatureImg = xmlDoc.GetElementsByTagName("PatientSignatureImg");






            byte[] byteArray = File.ReadAllBytes("C:\\Cache\\TransportationRunReporttemplate.docx");
            using (MemoryStream stream = new MemoryStream())
            {
                stream.Write(byteArray, 0, (int)byteArray.Length);
                using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(stream, true))
                {
                    using (StreamReader reader = new StreamReader(wordDoc.MainDocumentPart.GetStream()))
                    {
                        documentText = reader.ReadToEnd();
                    }





                    using (StreamWriter writer = new StreamWriter(wordDoc.MainDocumentPart.GetStream(FileMode.Create)))
                    {
                        writer.Write(documentText);
                    }

                }
                // Save the file with the new name
                File.WriteAllBytes("C:\\Cache\\MYFINISHEDTEMPLATE.docx", stream.ToArray());
            }




        }

        private static void AddPicture(Bitmap bitmap)
        {
            using (WordprocessingDocument doc = WordprocessingDocument.Open("C:\\Cache\\MYFINISHEDTEMPLATE.docx", true))
            {
                //Bitmap image = new Bitmap("C:\\Cache\\scribus.jpg");
                SdtElement controlBlock = doc.MainDocumentPart.Document.Body
                    .Descendants<SdtElement>()
                        .Where
                        (r =>
                            r.SdtProperties.GetFirstChild<Tag>().Val == "Signature"
                        ).SingleOrDefault();
                // Find the Blip element of the content control.
                A.Blip blip = controlBlock.Descendants<A.Blip>().FirstOrDefault();
                ImagePart imagePart = doc.MainDocumentPart
        .AddImagePart(ImagePartType.Jpeg);
                using (MemoryStream stream = new MemoryStream())
                {
                    bitmap.Save(stream, ImageFormat.Jpeg);
                    stream.Position = 0;
                    imagePart.FeedData(stream);
                }
                blip.Embed = doc.MainDocumentPart.GetIdOfPart(imagePart);

               /* DW.Inline inline = controlBlock
        .Descendants<DW.Inline>().FirstOrDefault();
                // 9525 = pixels to points
                inline.Extent.Cy = image.Size.Height * 9525;
                inline.Extent.Cx = image.Size.Width * 9525;
                PIC.Picture pic = inline
                    .Descendants<PIC.Picture>().FirstOrDefault();
                pic.ShapeProperties.Transform2D.Extents.Cy
                    = image.Size.Height * 9525;
                pic.ShapeProperties.Transform2D.Extents.Cx
                    = image.Size.Width * 9525;*/
            }
            ConvertToPDF(@"C:\Cache\MYFINISHEDTEMPLATE2.docx",@"C:\Cache\OpenPdf.pdf");

        }






        public static Bitmap Base64StringToBitmap(string base64String)
        {
            Bitmap bmpReturn = null;


            byte[] byteBuffer = Convert.FromBase64String(base64String);
            MemoryStream memoryStream = new MemoryStream(byteBuffer);


            memoryStream.Position = 0;


            bmpReturn = (Bitmap)Bitmap.FromStream(memoryStream);


            memoryStream.Close();
            memoryStream = null;
            byteBuffer = null;


            return bmpReturn;
        }
     public static void ConvertToPDF(string inputFile, string outputFile)
        {
            if (ConvertExtensionToFilterType(System.IO.Path.GetExtension(inputFile)) == null)
                throw new InvalidProgramException("Unknown file type for OpenOffice. File = " + inputFile);

            StartOpenOffice();

            //Get a ComponentContext
            var xLocalContext =
                Bootstrap.bootstrap();
            //Get MultiServiceFactory
            var xRemoteFactory =
                (XMultiServiceFactory)
                xLocalContext.getServiceManager();
            //Get a CompontLoader
            var aLoader =
                (XComponentLoader)xRemoteFactory.createInstance("com.sun.star.frame.Desktop");
            //Load the sourcefile

            XComponent xComponent = null;
            try
            {
                xComponent = InitDocument(aLoader,
                                          PathConverter(inputFile), "_blank");
                //Wait for loading
                while (xComponent == null)
                {
                    Thread.Sleep(1000);
                }

                // save/export the document
                SaveDocument(xComponent, inputFile, PathConverter(outputFile));
            }
            finally
            {
                if (xComponent != null) xComponent.dispose();
            }

        }

        private static void StartOpenOffice()
        {
            var ps = Process.GetProcessesByName("soffice.exe");
            if (ps.Length != 0)
                throw new InvalidProgramException("OpenOffice not found.  Is OpenOffice installed?");
            if (ps.Length > 0)
                return;
            var p = new Process
            {
                StartInfo =
                {
                    Arguments = "-headless -nofirststartwizard",
                    FileName = "soffice.exe",
                    CreateNoWindow = true
                }
            };
            var result = p.Start();

            if (result == false)
                throw new InvalidProgramException("OpenOffice failed to start.");
        }

        private static XComponent InitDocument(XComponentLoader aLoader, string file, string target)
        {
            var openProps = new PropertyValue[1];
            openProps[0] = new PropertyValue { Name = "Hidden", Value = new Any(true) };

            XComponent xComponent = aLoader.loadComponentFromURL(
               file, target, 0,
               openProps);

            return xComponent;
        }


        private static void SaveDocument(XComponent xComponent, string sourceFile, string destinationFile)
        {
            var propertyValues = new PropertyValue[2];
            // Setting the flag for overwriting
            propertyValues[1] = new PropertyValue { Name = "Overwrite", Value = new Any(true) };
            //// Setting the filter name
            propertyValues[0] = new PropertyValue
            {
                Name = "FilterName",
                Value = new Any(ConvertExtensionToFilterType(System.IO.Path.GetExtension(sourceFile)))
            };
            ((XStorable)xComponent).storeToURL(destinationFile, propertyValues);

        }


        private static string PathConverter(string file)
        {
            if (file == null || file.Length == 0)
                throw new NullReferenceException("Null or empty path passed to OpenOffice");

            return String.Format("file:///{0}", file.Replace(@"\", "/"));

        }

        public static string ConvertExtensionToFilterType(string extension)
        {
            switch (extension)
            {
                case ".odt":
                case ".doc":
                case ".docx":
                case ".txt":
                case ".rtf":
                case ".html":
                case ".htm":
                case ".xml":                
                case ".wps":
                case ".wpd":
                    return "writer_pdf_Export";
                case ".xls":
                case ".xlsb":
                case ".ods":
                    return "calc_pdf_Export";
                case ".ppt":
                case ".pptx":
                case ".odp":
                    return "impress_pdf_Export";

                default: return null;
            }
        }


    }

}

Just for information I cannot use anything that uses Interop because word will not be installed the machine. I am using OpenXML and OpenOffice

2

2 Answers

0
votes

This is what I would try (details below): 1) try Doc format instead of DocX 2) switch to Libre Office and try DocX again 2) use the odf-converter to get a better DocX -> ODT conversion.

More details...

There's something called the odf-conveter (opensource) that can convert DocX->ODT which gives you (typically) more accurate DocX->ODT than Open Office. Take a look at odf-conveter-integrator by OONinja for pre-packaged versions.

Also, Libre Office has DocX support ahead of OpenOffice so you might get a better result simply by switching to Libre Office.

A further option is to start from Doc format rather than DocX. In the OpenOffice world that translates much better to ODT and then to PDF.

Hope that helps.

0
votes

You may try to use Docxpresso to generate your .odt directly from HTML + CSS code and avoid any conversion issue.

Docxpresso is free for non-commercial use.