Thursday, 17 October 2013

How to read excel data into datatable using open xml

Recently i got a task to read data from an excel sheet into a datatable using open XML. i have found an interesting article in the net which am posting here. Hope this will help you

I have modified a little bit of it for the better performance which it can read DateTime values from the sheet also.

public class ReadExcelDataUsingOpenXML
    {
        public static SpreadsheetDocument CurrentDocument { get; private set; }
        public static string CurrentFilename { get; private set; }
        public static WorksheetPart CurrentSheet { get; set; }
        public static int? ColumnCount;

        private static readonly string[] ColumnNames = { "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z" };

        private static readonly List<string> ColumnNameList = new List<string>();

        public static bool OpenDocument(string fileName, bool editable)
        {
            DataTable dt = new DataTable();
            if (!File.Exists(fileName)) return false;
            SpreadsheetDocument spreadSheet = SpreadsheetDocument.Open(fileName, editable);
            CurrentDocument = spreadSheet;
            CurrentFilename = fileName;
            return true;
        }

        public static bool OpenSheet(string sheetName)
        {
            if (CurrentDocument == null) return false;
            IEnumerable<Sheet> sheets = CurrentDocument.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>().Where(s => s.Name == sheetName);
            if (sheets.Count() == 0) return false;
            string relationshipId = sheets.First().Id.Value;
            CurrentSheet = (WorksheetPart)CurrentDocument.WorkbookPart.GetPartById(relationshipId);
            return true;
        }

        public static bool ReadDocument(ref DataTable result)
        {
            var dataResult = new DataTable();
            if (!ExecuteReadDocument(ref dataResult)) return false;
            result = dataResult;
            return true;
        }

        public static string GetColumnName(int colIndex)
        {
            if (colIndex < 0) return "#";
            if (ColumnNameList.Count <= colIndex)
            {
                for (int index = ColumnNameList.Count; index < (colIndex + 1); index++)
                {
                    string colName;
                    if (index >= ColumnNames.Length)
                    {
                        var subIndex = (int)Math.Floor((double)index / ColumnNames.Length) - 1;
                        int sufIndex = (index - ((subIndex + 1) * ColumnNames.Length));
                        colName = GetColumnName(subIndex) + GetColumnName(sufIndex);
                    }
                    else colName = ColumnNames[index];
                    ColumnNameList.Add(colName);
                }
            }
            return ColumnNameList[colIndex];
        }

        private static bool ExecuteReadDocument(ref DataTable result)
        {
            if (CurrentSheet == null) throw new Exception("No sheet selected");
            var stringTableList = GetSharedStringPart().SharedStringTable.ChildElements.ToList();
            var lastRow = CurrentSheet.Worksheet.Descendants<Row>().LastOrDefault();
            if (lastRow == null) return false;
            var allRows = CurrentSheet.Worksheet.Descendants<Row>().ToList();
            for (var rowIndex = 1; rowIndex <= lastRow.RowIndex; rowIndex++)
            {
                var cellList = new List<string>();
                var rowIndexVlaues = (from rows in allRows where rows.RowIndex.Value == rowIndex select rows);
                var cellValues = (from c in rowIndexVlaues.FirstOrDefault().Descendants<Cell>() where c.CellValue != null select c).ToList();
                if (ColumnCount == null)
                {
                    ColumnCount = cellValues.Count;
                }
                for (var cellIndex = 0; cellIndex <= ColumnCount; cellIndex++)
                {
                    var colName = GetColumnName(cellIndex);
                    string strValue = null;

                    var cell = (from c in cellValues where c.CellReference.Value.Equals(colName + rowIndex, StringComparison.InvariantCultureIgnoreCase) select c).FirstOrDefault();
                    strValue = XLGetCellValue(cell, CurrentDocument.WorkbookPart);

                    if (cellIndex == 3 && rowIndex > 1)
                    {
                        if (strValue != null)
                        {
                            DateTime dtValue = FromExcelSerialDate(double.Parse(strValue));
                            strValue = dtValue.ToString();
                        }
                        else
                        {
                            strValue = null;
                        }
                    }
                    cellList.Add(strValue);
                }

                string[] rowData = cellList.ToArray();
                while (result.Columns.Count < rowData.Length)
                {
                    result.Columns.Add(GetColumnName(result.Columns.Count));
                }
                result.Rows.Add(rowData);
            }
            return true;
        }

        private static string XLGetCellValue(Cell c, WorkbookPart wbPart)
        {
            string value = null;
            if (c != null && c.CellValue != null)
            {
                value = c.CellValue.InnerText;


                if (c.DataType != null)
                {
                    switch (c.DataType.Value)
                    {
                        case CellValues.SharedString:
                            var stringTable = wbPart.GetPartsOfType<SharedStringTablePart>().FirstOrDefault();

                            if (stringTable != null)
                            {
                                value = stringTable.SharedStringTable.ElementAt(int.Parse(value)).InnerText;
                            }
                            break;
                        case CellValues.Boolean:
                            switch (value)
                            {
                                case "0":
                                    value = "FALSE";
                                    break;
                                default:
                                    value = "TRUE";
                                    break;
                            }
                            break;
                    }
                }
            }

            return value;
        }

        private static SharedStringTablePart GetSharedStringPart()
        {
            return CurrentDocument.WorkbookPart.SharedStringTablePart;
        }

        private static DateTime FromExcelSerialDate(double SerialDate)
        {
            if (SerialDate > 59) SerialDate -= 1;
            return new DateTime(1899, 12, 31).AddDays(SerialDate);
        }

        public static void CreateSpreadsheetWorkbook(string filepath, DataTable dt)
        {
            FileInfo f = new FileInfo(filepath);
            if (f.Exists)
                f.Delete();

            SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Create(filepath, SpreadsheetDocumentType.Workbook);

            WorkbookPart workbookpart = spreadsheetDocument.AddWorkbookPart();
            workbookpart.Workbook = new Workbook();

            WorksheetPart worksheetPart = workbookpart.AddNewPart<WorksheetPart>();
            worksheetPart.Worksheet = new Worksheet(new SheetData());

            Sheets sheets = spreadsheetDocument.WorkbookPart.Workbook.AppendChild<Sheets>(new Sheets());

            Sheet sheet = new Sheet() { Id = spreadsheetDocument.WorkbookPart.GetIdOfPart(worksheetPart), SheetId = 1, Name = "Sheet1" };
            sheets.Append(sheet);
            uint row = 1;
            int index;
            Cell cell;
            foreach (DataRow dr in dt.Rows)
            {
                for (int idx = 0; idx < dt.Columns.Count; idx++)
                {
                    SharedStringTablePart shareStringPart;
                    if (spreadsheetDocument.WorkbookPart.GetPartsOfType<SharedStringTablePart>().Count() > 0)
                    {
                        shareStringPart = spreadsheetDocument.WorkbookPart.GetPartsOfType<SharedStringTablePart>().First();
                    }
                    else
                    {
                        shareStringPart = spreadsheetDocument.WorkbookPart.AddNewPart<SharedStringTablePart>();
                    }

                    index = InsertSharedStringItem(Convert.ToString(dr[idx]), shareStringPart);
                    cell = InsertCellInWorksheet(dt.Columns[idx].ColumnName, row, worksheetPart);
                    cell.CellValue = new CellValue(index.ToString());
                    cell.DataType = new EnumValue<CellValues>(CellValues.SharedString);
                }
                row++;
            }
            workbookpart.Workbook.Save();
            spreadsheetDocument.Close();
        }

        private static int InsertSharedStringItem(string text, SharedStringTablePart shareStringPart)
        {
            if (shareStringPart.SharedStringTable == null)
            {
                shareStringPart.SharedStringTable = new SharedStringTable();
            }

            int i = 0;

            foreach (SharedStringItem item in shareStringPart.SharedStringTable.Elements<SharedStringItem>())
            {
                if (item.InnerText == text)
                {
                    return i;
                }

                i++;
            }
            shareStringPart.SharedStringTable.AppendChild(new SharedStringItem(new DocumentFormat.OpenXml.Spreadsheet.Text(text)));
            shareStringPart.SharedStringTable.Save();
            return i;
        }

        private static Cell InsertCellInWorksheet(string columnName, uint rowIndex, WorksheetPart worksheetPart)
        {
            Worksheet worksheet = worksheetPart.Worksheet;
            SheetData sheetData = worksheet.GetFirstChild<SheetData>();
            string cellReference = columnName + rowIndex;
            Row row;

            if (sheetData.Elements<Row>().Where(r => r.RowIndex == rowIndex).Count() != 0)
            {
                row = sheetData.Elements<Row>().Where(r => r.RowIndex == rowIndex).First();
            }
            else
            {
                row = new Row() { RowIndex = rowIndex };
                sheetData.Append(row);
            }

            if (row.Elements<Cell>().Where(c => c.CellReference.Value == columnName + rowIndex).Count() > 0)
            {
                return row.Elements<Cell>().Where(c => c.CellReference.Value == cellReference).First();
            }
            else
            {
                Cell refCell = null;
                Cell newCell = new Cell() { CellReference = cellReference };
                row.InsertBefore(newCell, refCell);
                worksheet.Save();
                return newCell;
            }
        }

    }

No comments:

Post a Comment