In the previous post, I talked about serializing objects. Well… there are times when two serialized objects of the same class need to be compared for differences, for example:
Before | After | Result |
<?xml version="1.0" encoding="utf-8"?> <Object> <PrimaryKey> <![CDATA[123]]> </PrimaryKey> <Data1> <![CDATA[sOme thing]]> </Data1> <Data2> <![CDATA[sAme thing]]> </Data2> <SavedInformation> <![CDATA[Do not compare]]> </SavedInformation> </Object> | <?xml version="1.0" encoding="utf-8"?> <Object> <PrimaryKey> <![CDATA[123]]> </PrimaryKey> <Data1> <![CDATA[NO thing]]> </Data1> <Data2> <![CDATA[sAme thing]]> </Data2> <SavedInformation> <![CDATA[Do not compare]]> </SavedInformation> </Object> | <?xml version="1.0" encoding="utf-8"?> <Object> <PrimaryKey> <![CDATA[123]]> </PrimaryKey> <Data1> <before> <![CDATA[sOme thing]]> </before> <after> <![CDATA[NO thing]]> </after> </Data1> <SavedInformation> <![CDATA[Do not compare]]> </SavedInformation> </Object> |
Well I have just the class that’ll do this.
The rules are simple:
- Recursive: The class will recursively search every node for differences
- PrimaryKey (changeable): If primary key is available, it’ll be used to justify whether 2 nodes are comparable.
- If PrimaryKey is NOT available, the unidentified node will be alphanumerically ordered using a stable sort, then all nodes will be compared based on their position.
- PrimaryKey is always kept.
- SavedInformation (changeable) is always kept and ignored.
- Leaf nodes that changed are always kept and will feature two additional sub nodes: the “before” and “after” nodes.
- Parent of leaf nodes that changed are always kept.
- If the before state is a leaf node, but the after state is a container node, then the comparison, before and after will be tacked on along the shortest path, in this case, the leaf node.
So here’s how you can do that:
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Xml; namespace XMLSerializerHelperClasses { /// <summary> /// Pair up XMLNodes, one with another if they're comparable. /// </summary> internal class XmlNodePair { internal XmlNode Node1; internal XmlNode Node2; } /// <summary> /// Finds the difference of 2 XML Serialized objects. /// Fields/Nodes are comparable only when the Join_Key of the node is identical (just like in the database, the same identity key that represent 2 different values indicates an update). /// When join_key is new, or gone, this is indicative of an insert or a delete. /// </summary> public class Difference { //The key to indicate that two objects are comparable. If the key differs then the object would be assumed to be recently created/deleted. public static string JOIN_KEY = "PrimaryKey"; //Indicates that this information needs to be stored whether or not it changed. Ideally this is some information that may ease human read-ability. public static string REQUIRED_NODE_NAME = "_UserIdentifier"; /// <summary> /// Finds the difference between two xmldocuments in string format. /// The result returned is an xml document with the old and new values wrapped inside <before></before> and <after></after> nodes. /// All parent nodes that contains the difference is preserved, along with the difference. /// Any other node (that did not change) is removed to simplify the resulting XML. /// </summary> /// <param name="Doc1String"></param> /// <param name="Doc2String"></param> /// <returns></returns> public static XmlDocument FindDocumentDifference(string Doc1String, string Doc2String) { XmlDocument doc1 = new XmlDocument(), doc2 = new XmlDocument(); //Sometimes the string has a funny character and can't be loaded to xml document, we want to get rid of that character here. //The character is needed for deserialization, but since the xml difference will never be deserialized, this should be the best place to eliminate it. try { doc1.LoadXml(Doc1String); } catch { doc1.LoadXml(Doc1String.Substring(1)); } try { doc2.LoadXml(Doc2String); } catch { doc2.LoadXml(Doc2String.Substring(1)); } return FindDocumentDifference(doc1, doc2); } /// <summary> /// Finds the difference between 2 xml documents. /// </summary> /// <param name="Doc1"></param> /// <param name="Doc2"></param> /// <returns></returns> public static XmlDocument FindDocumentDifference(XmlDocument Doc1, XmlDocument Doc2) { XmlDocument doc = new XmlDocument(); doc.AppendChild(doc.CreateXmlDeclaration("1.0", "utf-16", null)); XmlNode Differences = FindNodeDifference(Doc1.SelectSingleNode("/*").PutPrimaryKeyFirst(), Doc2.SelectSingleNode("/*").PutPrimaryKeyFirst(), doc); if (Differences != null) { doc.AppendChild(Differences); return doc; } //We want to return null to indicate that there's no difference, such that it's easier to check on the database side, rather than having to inspect the first child. return null; } /// <summary> /// Creates an xml node containing all the differences between node1 and node2. /// When assembled this will create another xml document containing only the differences. /// NOTE: Node1 and Node2, must be of the SAME TYPE! /// NOTE: Node1 and Node2, must have the SAME PRIMARY KEY (IF EXISTS)! /// </summary> /// <param name="Node1"></param> /// <param name="Node2"></param> /// <returns></returns> public static XmlNode FindNodeDifference(XmlNode Node1, XmlNode Node2, XmlDocument doc) { XmlNode Before = null, After = null, Difference; try { Difference = doc.CreateNode(XmlNodeType.Element, Node1.Name, null); } catch { try { Difference = doc.CreateNode(XmlNodeType.Element, Node2.Name, null); } catch { Difference = doc.CreateNode(XmlNodeType.Element, "UNKNOWN", null); } } //There must be a text in between. if (Node1 == null) Node1.InnerText = "NULL"; if (Node2 == null) Node2.InnerText = "NULL"; //The most basic case, when both nodes are text. if (!IsContainer(Node1) || !IsContainer(Node2)) { if (Node1.FirstChild == null || Node2.FirstChild == null || Node1.FirstChild.Name == "#text" || Node2.FirstChild.Name == "#text") { //No difference. if (Node1.FirstChild != null && Node2.FirstChild != null && Node1.FirstChild.Value == Node2.FirstChild.Value) { if (Difference.Name == REQUIRED_NODE_NAME) //Both name and content of the node must be the same, otherwise it'll show up as before and after. { //There's no difference here, but we want to save it anyways, because this is an identifying information that's friendlier to the user. Difference.InnerText = Node1.FirstChild.Value; } else { return null; } //Found difference. } //The both nodes are of type <XXX/>, then the difference is nothing. else if (Node1.FirstChild == null && Node2.FirstChild == null) { return null; } else { Before = doc.CreateNode(XmlNodeType.Element, "before", null); After = doc.CreateNode(XmlNodeType.Element, "after", null); Before.InnerXml = Node1.InnerXml; After.InnerXml = Node2.InnerXml; Difference.AppendChild(Before); Difference.AppendChild(After); } } } //If the nodes are containers else if (IsContainer(Node1) && IsContainer(Node2)) { Queue<XmlNodePair> ComparisonQueue = PairUpMatchingContainers(Node1, Node2, doc); XmlNodePair DifferencePair; XmlNode PrimaryKeyNode = doc.CreateNode(XmlNodeType.Element, JOIN_KEY, null); //Because Node1 and Node2, should have the same primary key, so it really doesn't matter. try { PrimaryKeyNode.InnerText = Node1.GetNode(JOIN_KEY).InnerText; } catch { try { PrimaryKeyNode.InnerText = Node2.GetNode(JOIN_KEY).InnerText; } catch { PrimaryKeyNode.InnerText = "ANON"; } } while (ComparisonQueue.Count != 0) { DifferencePair = ComparisonQueue.Dequeue(); //Add the difference, if it's not null. try { Difference.AppendChild(FindNodeDifference(DifferencePair.Node1, DifferencePair.Node2, doc)); //Preserves the primary key, place it to the beginning of the list. Difference.InsertBefore(PrimaryKeyNode, Difference.FirstChild); } //If null, adds nothing. catch { } } //If there's a difference it should have before and after nodes, making it a container. //If all of the children of this node, are the same, and hence no difference, return null. if (!IsContainer(Difference)) return null; } //If there's a difference return it. return Difference; } /// <summary> /// The purpose of this to pair up child elements of the two nodes, that has the same id. /// If no ID found, then pair them up based on their type and general position. /// The ID should be found by the grand child element relative to node1, that is named JOIN_KEY. /// This pairing will work for one level ONLY! It'll not explore sub-nodes. /// </summary> /// <param name="Node1"></param> /// <param name="Node2"></param> /// <returns></returns> private static Queue<XmlNodePair> PairUpMatchingContainers(XmlNode Node1, XmlNode Node2, XmlDocument doc) { Queue<XmlNodePair> ProcessingOrder = new Queue<XmlNodePair>(); Dictionary<string, XmlNodePair> Buckets = new Dictionary<string, XmlNodePair>(); Dictionary<string, int> BucketedCounter; int Counter = -1; //Go through the children of the first node. foreach (XmlNode Child in Node1.ChildNodes) { //Figure out how many times we have seen this same node appear in the xml. We need this in order to do comparison where primary key is not given. BucketedCounter = new Dictionary<string, int>(); try { BucketedCounter[Child.Name] = BucketedCounter[Child.Name] + 1; } catch { BucketedCounter[Child.Name] = 0; } Counter = BucketedCounter[Child.Name]; //This child is not of collection type, continue. if (Child.Name == "#text") continue; string NodeKey = ""; //If the node has a primary key, use this key to generate a dictionary key. if (Child.GetNode(JOIN_KEY) != null) { //Creates the nodekey from the Primary Key NodeKey = Child.Name + ":" + Child.GetNode(JOIN_KEY).InnerText; } //If the node does not have the primary key else { //The nodekey will be based on the the position and the name of the node. NodeKey = Child.Name + ":Count" + Counter; //Notice the ":Count" this is to make sure that counter are always seperate from those with real primary key, in case the primary key is the same as the counter. } //Creates a new node. XmlNodePair temp = new XmlNodePair(); //The first node is the child temp.Node1 = Child; //The second node should be the one from the dictionary, if it exists. try { temp.Node2 = Buckets[NodeKey].Node2; } //If not copy the info from node 1, and set the inner text to null. - This means the node exists in Node1 but not in Node2 catch { temp.Node2 = doc.CreateNode(XmlNodeType.Element, Child.Name, Node1.NamespaceURI); temp.Node2.InnerText = "NULL"; } //Adds to the dictionary. Buckets[NodeKey] = temp; } //Reset the counter. Counter = 0; //Go through the children of the second node. foreach (XmlNode Child in Node2.ChildNodes) { //Figure out how many times we have seen this same node appear in the xml. We need this in order to do comparison where primary key is not given. BucketedCounter = new Dictionary<string, int>(); try { BucketedCounter[Child.Name] = BucketedCounter[Child.Name] + 1; } catch { BucketedCounter[Child.Name] = 0; } Counter = BucketedCounter[Child.Name]; //This child is not of collection type, continue. if (Child.Name == "#text") continue; string NodeKey = ""; //If the node has a primary key, use this key to generate a dictionary key. if (Child.GetNode(JOIN_KEY) != null) { //Creates the nodekey from the Primary Key NodeKey = Child.Name + ":" + Child.GetNode(JOIN_KEY).InnerText; } //If the node does not have the primary key else { //The nodekey will be based on the the position and the name of the node. NodeKey = Child.Name + ":Count" + Counter; //Notice the ":Count" this is to make sure that counter are always seperate from those with real primary key, in case the primary key is the same as the counter. } //Creates a new node. XmlNodePair temp = new XmlNodePair(); //The first node is the child temp.Node2 = Child; //The FIRST node should be the one from the dictionary, if it exists. try { temp.Node1 = Buckets[NodeKey].Node1; } //If not copy the info from node 2, and set the inner text to null. - This means the node exists in Node1 but not in Node2 catch { temp.Node1 = doc.CreateNode(XmlNodeType.Element, Child.Name, Node1.NamespaceURI); temp.Node1.InnerText = "NULL"; } //Adds to the dictionary. Buckets[NodeKey] = temp; } //Puts the dictionary into the queue. foreach (string DictionaryKey in Buckets.Keys) { ProcessingOrder.Enqueue(Buckets[DictionaryKey]); } return ProcessingOrder; } /// <summary> /// Checks if an xml node is a container. /// A container is a node that contains any other node types besides text. /// </summary> /// <param name="Node">The node to be checked.</param> /// <returns>True if it's a container, false otherwise.</returns> public static bool IsContainer(XmlNode Node) { bool IsContainer = false; foreach (XmlNode TestNode in Node.ChildNodes) { if (TestNode.NodeType != XmlNodeType.Text) { IsContainer = true; break; } } return IsContainer; } } }