wordpress hit counter
Welcome to OpenXML Developer Sign in | Join | Help

Merging / Appending docx files without AfChunk tags

Last post 12-29-2008, 4:03 AM by iloveyoutaiwanmb. 2 replies.
Sort Posts: Previous Next
  •  12-21-2008, 8:10 AM 3959

    Merging / Appending docx files without AfChunk tags

    Hi,

    I have to append docx files as part of my solution and unfortunately the consuming application does not understand AFChunk tags. Hence I have to copy the contents into the body of the destination document.

     Can someone please point me to a code snippet that does merge two documents without using afChunk. I know its adding appending body tags and then mergning styles and other content types. If there's a sample I can refer it would be really great !

    Thanks

     

  •  12-22-2008, 6:47 PM 3961 in reply to 3959

    Re: Merging / Appending docx files without AfChunk tags

    Hi,
    There doesn't seem to be any information available on how to accomplish this. I have been waiting for somebody else to come up with a solution.  So this is how far I have been able to proceed.
    My requirement is to merge sets of two word documents, The first are dynamically created, including content controls that I use as place markers for image, rich text and list replacement. This all works just fine and is in production.
    The second document belongs to a logically named set of documents created by running the openxml powertools conversion utility to convert from word to word2007 across a folder of like names legacy word docs. These converted docx files are pretty simple in that the just consist of body with many patrgraph elements.
    I started with the images thinking that I would be able to add the source images to my target docx file as long as they had original rids and image uri's. I can merge the images, into the target document and can see them in the correct place within the target docx file. But, even though the contenType of the ImagePart is e.g. image/png, the TargetFileExtension is .bin. How do I end up with a target file extension of (as in this case) .png or for that matter .jpg when the contentType is image/jpg. Is this a bug in ctp 2, which is what I an using.
    Anybody else been able to progress this further?

           public static void storeNewImages(string destDoc, string sourceDoc)
            {
                List<string[]> ridPairs = new List<string[]>();
                WordprocessingDocument docTarget = WordprocessingDocument.Open(destDoc, true);
                WordprocessingDocument docSource = WordprocessingDocument.Open(sourceDoc, false);
                Package docTargetPackage = docTarget.Package;
                Stream sourceImageStream;
                using (docTarget)
                {
                    using (docSource)
                    {
                        /*
                        * get the images from the additional document and add them to the images
                        * collection of the current output document
                        */
                        var theSourceImages = docSource.MainDocumentPart.ImageParts;
                        MainDocumentPart targetMainDocumentPart = docTarget.MainDocumentPart;
                        //
                        foreach (ImagePart oneSourceImage in theSourceImages)
                        {
                            //
                            // we will need to swap this later
                            //
                            string sourceRid = docSource.MainDocumentPart.GetIdOfPart(oneSourceImage);
                            //
                            // get contents of the source image
                            //
                            string contentType = oneSourceImage.ContentType;
                            sourceImageStream = oneSourceImage.GetStream(FileMode.Open, FileAccess.Read);
                            using (sourceImageStream)
                            {
                                //
                                // create the new image part
                                // Note the null Argument to the rid string
                                // it will create the new Rid automagically
                                //
                                ImagePart newImagePart = targetMainDocumentPart.AddNewPart<ImagePart>(contentType,null);
                                //
                                newImagePart.FeedData(sourceImageStream);
                                //
                                // get the new parts rid
                                //
                                string targetRid = docTarget.MainDocumentPart.GetIdOfPart(newImagePart);
                                //
                                // add the new and old rids to the arraylist
                                //
                                string[] onePair = new string[] { sourceRid, targetRid };
                                ridPairs.Add(onePair);
                            }
                        }

                        var theTargetImages = docTarget.MainDocumentPart.ImageParts;
                        foreach (ImagePart oneTargetImage in theTargetImages)
                        {
                            string targetRid = docTarget.MainDocumentPart.GetIdOfPart(oneTargetImage);
                            //
                            // create a new name for the image
                            //
                            string newRid = oneTargetImage.Uri.ToString();
                           
                        }

                        // doNextStuff(ridPairs, docTarget, docSource);
                        docTarget.MainDocumentPart.Document.Save();

                    }
                }
            }

    // not yet implimented

          private static void doNextStuff(List<string[]> ridPairs, WordprocessingDocument docTarget, WordprocessingDocument docSource)
            {
                //
                // references to both documents body sections
                //
                DocumentFormat.OpenXml.Wordprocessing.Body targetBody = docTarget.MainDocumentPart.Document.Body;
                DocumentFormat.OpenXml.Wordprocessing.Body sourceBody = docSource.MainDocumentPart.Document.Body;
                //
                // get all the paragraphs in the source document
                //
                var sourceParas =
                   sourceBody
                    .Elements<Paragraph>()
                    .ToList<Paragraph>();
                //
                // step through each paragraph
                //

                List<Paragraph> incomingParaGraphs = new List<Paragraph>();

                foreach (var onePara in sourceParas)
                {

                    // Runs
                    //
                    List<Run> paraRuns;
                    int paraRunCount =
                        onePara
                        .Elements<Run>()
                        .Count();

                    if (paraRunCount > 0)
                    {
                        paraRuns =
                            onePara
                            .Elements<Run>()
                            .ToList<Run>();
                        foreach (Run oneRun in paraRuns)
                        {
                            int picCount =
                                oneRun
                                .Elements<Picture>()
                                .Count();
                            if (picCount > 0)
                            {
                                var pictures =
                                    oneRun
                                    .Elements<Picture>()
                                    .ToList<Picture>();
                                foreach (Picture onePic in pictures)
                                {
                                    DocumentFormat.OpenXml.Vml.Shape picShape =
                                        onePic
                                        .Elements<DocumentFormat.OpenXml.Vml.Shape>()
                                        .First();
                                    int imageCount =
                                        picShape
                                        .Elements<DocumentFormat.OpenXml.Vml.ImageData>()
                                        .Count();
                                    if (imageCount > 0)
                                    {
                                        DocumentFormat.OpenXml.Vml.ImageData picImage =
                                            picShape
                                            .Elements<DocumentFormat.OpenXml.Vml.ImageData>()
                                            .First();

                                        string relToChange = picImage.RelationshipId;
                                        foreach (string[] ridPair in ridPairs)
                                        {
                                            if (ridPair[0].ToString() == relToChange)
                                            {
                                                picImage.RelationshipId = ridPair[1].ToString();
                                                break;
                                            }
                                        }
                                    }

                                }
                            }
                        }
                    }
                    //
                    // Properties
                    //
                    List<ParagraphProperties> paraProps;
                    int paraPropCount =
                        onePara
                        .Elements<ParagraphProperties>()
                        .Count();
                    if (paraPropCount > 0)
                    {
                        paraProps =
                            onePara
                            .Elements<ParagraphProperties>()
                            .ToList<ParagraphProperties>();
                    }

                    Paragraph newPara = new Paragraph(onePara.OuterXml);

                    incomingParaGraphs.Add(newPara);
                }

                DocumentFormat.OpenXml.Wordprocessing.Table newTable = new DocumentFormat.OpenXml.Wordprocessing.Table();
                foreach (var extraPara in incomingParaGraphs)
                {
                    newTable.AppendChild<Paragraph>(extraPara);
                }

                if (targetBody.LastChild is SectionProperties)
                {
                    // The last paragraph should before the section properties if there is section properties.
                    targetBody.InsertBefore(newTable, targetBody.LastChild);
                }
                else
                {
                    targetBody.AppendChild(newTable);
                }
            }
        }


  •  12-29-2008, 4:03 AM 3968 in reply to 3959

    Re: Merging / Appending docx files without AfChunk tags

    Hi, All,
    I've tried to merge two word documents without AltChunk. By SDK 2.0, the application is able to merge two document bodies, if they are not too complex. My next step is to build the TOC programmatically. I've also tried the AltChunk version, and is still comparing the intricate difference.
    Actually, I'm new to .NET and Open XML, so the code is rather dummy...... :-(
    ------------------------------------------------------------------------

    Imports <xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
    Imports <xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">

    Partial Public Class _Default

        Inherits System.Web.UI.Page
        Const HyperLinkRelationshipType As String = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"
        Protected Sub Page_Load(ByVal sender As Object, ByVal e As System.EventArgs) Handles Me.Load
        End Sub

        Protected Function RIdExistsInMainDocumentPart(ByRef wd As WordprocessingDocument, ByVal rId As String) As Boolean
            Dim mdp As MainDocumentPart = wd.MainDocumentPart

            'For Each p In wdps
            '    If p.RelationshipId = rId Then
            '        Response.Write(p.RelationshipId & "</br>")
            '        Return True
            '    End If
            'Next

            'Dim IdPartPairs As IEnumerable(Of IdPartPair) = mdp.Parts
            'For Each p In IdPartPairs
            '    If p.RelationshipId = rId Then
            '        Response.Write(p.RelationshipId & "</br>")
            '        Return True
            '    End If
            'Next

            Dim mdpER As IEnumerable(Of ExternalRelationship)
            If mdp.ExternalRelationships.Count <> 0 Then
                mdpER = mdp.ExternalRelationships
                For Each ER In mdpER
                    If ER.RelationshipType = HyperLinkRelationshipType Then
                        'Response.Write(ER.Id & "   " & ER.Uri.ToString() & " repeats</br>")
                        If ER.Id = rId Then
                            Response.Write(ER.Id & "   " & ER.Uri.ToString() & " repeats</br>")
                            Return True
                        End If
                    End If
                Next
            End If
            Return False
        End Function

        Protected Function StyleIdExistsInMainDocumentPart(ByRef wd As WordprocessingDocument, ByVal sId As String) As Boolean
            Dim mdp As MainDocumentPart = wd.MainDocumentPart
            Dim mdpStyleID As IEnumerable(Of DocumentFormat.OpenXml.Wordprocessing.StyleId)
            If mdp.StyleDefinitionsPart.Styles.Elements.Count() <> 0 Then
                mdpStyleID = mdp.StyleDefinitionsPart.Styles.Elements(Of DocumentFormat.OpenXml.Wordprocessing.StyleId)()
                For Each StyldID In mdpStyleID
                    'Response.Write(ER.Id & "   " & ER.Uri.ToString() & " repeats</br>")
                    If StyldID.Val.ToString() = sId Then
                        Response.Write(StyldID.Val.ToString() & " repeats</br>")
                        Return True
                    End If

                Next
            End If
            Return False
        End Function

        Protected Function ChangeExternalRelationshipId(ByRef wd As WordprocessingDocument, ByVal er As ExternalRelationship) As ExternalRelationship
            Dim oldER As ExternalRelationship = er
            Response.Write("Will delete Old ID:" & oldER.Id & "</br>")
            wd.MainDocumentPart.DeleteExternalRelationship(er)
            er = wd.MainDocumentPart.AddExternalRelationship(HyperLinkRelationshipType, oldER.Uri)
            Response.Write("Generate new ID:" & er.Id & "</br>")

            Dim hlink As IEnumerable(Of DocumentFormat.OpenXml.Wordprocessing.Hyperlink) = _
                From h In wd.MainDocumentPart.Document.Body.Descendants(Of DocumentFormat.OpenXml.Wordprocessing.Hyperlink)() _
            Where (h.Id = oldER.Id)
            If hlink.Count = 0 Then
                Response.Write("Error:ID not Found: " & oldER.Id & "</br>")
            Else
                hlink.ElementAtOrDefault(0).IdValue = er.Id
                Response.Write("New ID: " & er.Id & "</br>")
            End If

            'Modify the conflicting rId to the new value ...
            ChangeExternalRelationshipId = er
        End Function

        Protected Sub ChangeImagePartId(ByVal wd As WordprocessingDocument, ByVal ipr As ModifiedImagePartRecord)
            Dim em As New StringValue(ipr.newID)
            Dim blips As IEnumerable(Of DocumentFormat.OpenXml.Drawing.Blip) = _
                From blip In wd.MainDocumentPart.Document.Body.Descendants(Of DocumentFormat.OpenXml.Drawing.Blip)() _
                Where (blip.Embed.Value = ipr.oldID)
            'Don't use blips.First.Embed = em. Because it would cause blips to be NOTHING
            'Why? Just don't understand. Maybe it's relevant to Type.IsGenericType.
            Dim tempblip As DocumentFormat.OpenXml.Drawing.Blip = New DocumentFormat.OpenXml.Drawing.Blip()
            tempblip = blips.First
            tempblip.Embed = em
        End Sub

        Structure ModifiedExternalRelationshipRecord
            Dim oldER As ExternalRelationship
            Dim newER As ExternalRelationship
            Dim modified As Boolean
        End Structure

        Structure ModifiedImagePartRecord
            Dim oldIP As ImagePart
            Dim oldID As String
            Dim newID As String
            Dim modified As Boolean
        End Structure

        Protected Sub MergeWordDocument()
            Dim wordDoc1 As WordprocessingDocument = WordprocessingDocument.Open(TextBox1.Text, True)
            Dim mainDocPart1 As MainDocumentPart = wordDoc1.MainDocumentPart
            Dim mainDocPartReader1 As OpenXmlReader = OpenXmlReader.Create(mainDocPart1.GetStream())
            'Dim styleDoc1 As StyleDefinitionsPart = wordDoc1.GetPartsOfType(Of StyleDefinitionsPart)()

            Dim wordDoc2 As WordprocessingDocument = WordprocessingDocument.Open(TextBox2.Text, True)
            Dim mainDocPart2 As MainDocumentPart = wordDoc2.MainDocumentPart
            Dim mainDocPartReader2 As OpenXmlReader = OpenXmlReader.Create(mainDocPart2.GetStream())

            Dim ModifiedExternalRelationshipList As New ArrayList

            If mainDocPart2.ExternalRelationships.Count <> 0 Then
                For Each ExtR In mainDocPart2.ExternalRelationships
                    If ExtR.RelationshipType = HyperLinkRelationshipType Then
                        Dim tempRecord As ModifiedExternalRelationshipRecord = New ModifiedExternalRelationshipRecord
                        If RIdExistsInMainDocumentPart(wordDoc1, ExtR.Id) Then
                            tempRecord.oldER = ExtR
                            ModifiedExternalRelationshipList.Add(tempRecord)
                        Else
                            mainDocPart1.AddExternalRelationship(HyperLinkRelationshipType, ExtR.Uri, ExtR.Id)
                        End If
                    End If
                Next
                If ModifiedExternalRelationshipList IsNot Nothing Then
                    For Each er In ModifiedExternalRelationshipList
                        er.newER = ChangeExternalRelationshipId(wordDoc2, er.oldER)
                        mainDocPart1.AddExternalRelationship(HyperLinkRelationshipType, er.newER.Uri, er.newER.Id)
                    Next
                End If
            End If
            'Check images in wordDoc2
            Dim ModifiedImagePartList As New ArrayList
            If wordDoc2.MainDocumentPart.ImageParts.Count <> 0 Then
                'TODO ASAP 12/25 1850
                For Each oldip In wordDoc2.MainDocumentPart.ImageParts

                    Dim oldid As String = wordDoc2.MainDocumentPart.GetIdOfPart(oldip)
                    Dim newip As ImagePart = wordDoc1.MainDocumentPart.AddPart(Of ImagePart)(oldip)
                    Dim newid As String = wordDoc1.MainDocumentPart.GetIdOfPart(newip)
                    Dim temprecord As New ModifiedImagePartRecord
                    With temprecord
                        .oldIP = oldip
                        .newID = newid
                        .oldID = oldid
                    End With
                    'temprecord.oldIP = oldip
                    'temprecord.newID = newid
                    'temprecord.oldID = oldid
                    ModifiedImagePartList.Add(temprecord)
                    Response.Write(oldid & "</br>")
                    Response.Write(newid & "</br>")
                Next
                If ModifiedImagePartList IsNot Nothing Then
                    For Each ip In ModifiedImagePartList
                        ChangeImagePartId(wordDoc2, ip)

                    Next
                End If
            End If



            Dim Count1 As Integer = wordDoc1.MainDocumentPart.Document.Body.Elements.Count()
            Dim Count2 As Integer = wordDoc2.MainDocumentPart.Document.Body.Elements.Count()

            Dim LastP As Paragraph = wordDoc1.MainDocumentPart.Document.Body.Elements(Of Paragraph).Last()
            Dim LastS As SectionProperties = wordDoc1.MainDocumentPart.Document.Body.Elements(Of SectionProperties).Last()
            If LastP.NextSibling.Equals(LastS) Then
                LastP.NextSibling.Remove()

                If LastP.Elements(Of ParagraphProperties).Count() = 0 Then
                    LastP.PrependChild(Of ParagraphProperties)(New ParagraphProperties())
                End If

                Dim pPr As ParagraphProperties = LastP.Elements(Of ParagraphProperties).First()
                pPr.SectionProperties = LastS
                'Append the tail of wordDoc1 one by one.
                'By p.OuterXml(), no need to shorten the Body of wordDoc2.
                'ToDo: StyleId checks, maybe similar to that in ExternalRelationship
                For Each p In wordDoc2.MainDocumentPart.Document.Body.Elements()
                    Select Case p.LocalName()
                        Case "p"
                            'Response.Write("w:p" & "</br>")
                            Dim temptree As Paragraph = New Paragraph(p.OuterXml())
                            If temptree.Elements(Of ParagraphProperties).Count = 0 Then
                            Else
                                Dim styleID As DocumentFormat.OpenXml.Wordprocessing.ParagraphStyleId = temptree.Elements(Of ParagraphProperties).FirstOrDefault.ParagraphStyleId
                                If styleID IsNot Nothing Then
                                    Response.Write("ParagraphStyleID =" & styleID.Val.ToString() & " </br>")
                                    If StyleIdExistsInMainDocumentPart(wordDoc1, styleID.Val) Then
                                    Else
                                        Dim style As IEnumerable(Of DocumentFormat.OpenXml.Wordprocessing.Style) = _
                                            From s In wordDoc2.MainDocumentPart.StyleDefinitionsPart.Styles.Elements(Of DocumentFormat.OpenXml.Wordprocessing.Style)() _
                                            Where (s.StyleId.Value = styleID.Val)
                                        If style IsNot Nothing Then
                                            Dim tempstyle As DocumentFormat.OpenXml.Wordprocessing.Style = New DocumentFormat.OpenXml.Wordprocessing.Style(style.FirstOrDefault.OuterXml())
                                            wordDoc1.MainDocumentPart.StyleDefinitionsPart.Styles.AppendChild(tempstyle)
                                            tempstyle = Nothing
                                        End If
                                    End If
                                End If
                            End If
                            wordDoc1.MainDocumentPart.Document.Body.Append(temptree)
                            temptree = Nothing
                        Case "tbl"
                            'Response.Write("w:tbl" & "</br>")
                            Dim temptree As DocumentFormat.OpenXml.Wordprocessing.Table = New DocumentFormat.OpenXml.Wordprocessing.Table(p.OuterXml())
                            If temptree.Elements(Of DocumentFormat.OpenXml.Wordprocessing.TableProperties).Count = 0 Then
                            Else
                                Dim styleID As DocumentFormat.OpenXml.Wordprocessing.TableStyleId = temptree.Elements(Of DocumentFormat.OpenXml.Wordprocessing.TableProperties).FirstOrDefault.TableStyleId
                                If styleID IsNot Nothing Then
                                    Response.Write("TableStyleID =" & styleID.Val.ToString() & " </br>")
                                    If StyleIdExistsInMainDocumentPart(wordDoc1, styleID.Val) Then
                                    Else
                                        Dim style As IEnumerable(Of DocumentFormat.OpenXml.Wordprocessing.Style) = _
                                            From s In wordDoc2.MainDocumentPart.StyleDefinitionsPart.Styles.Elements(Of DocumentFormat.OpenXml.Wordprocessing.Style)() _
                                            Where (s.StyleId.ToString() = styleID.Val.ToString())
                                        Dim tempstyle As DocumentFormat.OpenXml.Wordprocessing.Style = New DocumentFormat.OpenXml.Wordprocessing.Style(style.FirstOrDefault.OuterXml())
                                        wordDoc1.MainDocumentPart.StyleDefinitionsPart.Styles.AppendChild(tempstyle)
                                        tempstyle = Nothing
                                    End If
                                End If
                            End If
                            wordDoc1.MainDocumentPart.Document.Body.Append(temptree)
                            temptree = Nothing

                            'sectPr must be the last unit before the end of Body. by MSDN
                        Case "sectPr"
                            Response.Write("w:sectPr" & "</br>")
                            Dim temptree As SectionProperties = New SectionProperties(p.OuterXml())
                            wordDoc1.MainDocumentPart.Document.Body.Append(temptree)
                            temptree = Nothing
                    End Select

                Next
            End If

            'wordDoc2.MainDocumentPart.Document.Save(mainDocPart2.GetStream())
            'mainDocPart2.Document.Save()
            wordDoc2.Close()


            wordDoc1.MainDocumentPart.StyleDefinitionsPart.Styles.Save()
            wordDoc1.MainDocumentPart.Document.Save(mainDocPart1.GetStream())
            mainDocPart1.Document.Save()
            wordDoc1.Close()

        End Sub
        Protected Function MergeByAltChunk() As Boolean
            Dim wordDoc1 As WordprocessingDocument = WordprocessingDocument.Open(TextBox1.Text, True)
            Dim mainDocPart1 As MainDocumentPart = wordDoc1.MainDocumentPart
            'Dim wordDoc2 As WordprocessingDocument = WordprocessingDocument.Open(TextBox2.Text, True)
            'Dim mainDocPart2 As MainDocumentPart = wordDoc2.MainDocumentPart
            Dim fileStream As FileStream = File.Open(TextBox2.Text, FileMode.Open)

            'AltChunk embeds a document into the other document. Not just copy-and-paste!
            Dim altChunkID As String = New String("AltChunkId1")
            Dim chunk As AlternativeFormatImportPart = mainDocPart1.AddAlternativeFormatImportPart(AlternativeFormatImportPartType.WordprocessingML, altChunkID)
            chunk.FeedData(fileStream)
            Dim altChunk As AltChunk = New AltChunk()
            altChunk.Id = altChunkID
            mainDocPart1.Document.Body.InsertAfter(altChunk, mainDocPart1.Document.Body.Elements(Of Paragraph)().Last())
            mainDocPart1.Document.Save()
            wordDoc1.Close()
            fileStream.Close()
        End Function
        Protected Sub Button1_Click(ByVal sender As Object, ByVal e As EventArgs) Handles Button1.Click
            MergeWordDocument()
            'MergeByAltChunk()
        End Sub
    End Class
View as RSS news feed in XML