Wednesday 28 November 2012

Game of Life


Here is one small attempt to write a program that runs Conway's Game Of Life. 
It is a zero player game which does not require any user input once it has been started with 
an initial configuration. It is basically an infinite 2-D array of cells, each cell having 
state of either being alive (1) or being dead (0). At each step (tick of time), state of 
cell changes based on 4 simple rules (referred from aforementioned wikipedia article) -
  • Any live cell with fewer than two live neighbours dies, as if caused by under-population.
  • Any live cell with two or three live neighbours lives on to the next generation.
  • Any live cell with more than three live neighbours dies, as if by overcrowding.
  • Any dead cell with exactly three live neighbours becomes a live cell, as if by reproduction.
Below is a simple C# console application that runs Game of life. It is by no means
the best implementation but it is a good representation of an auto-updating state machine.

class Program
    {
        static void Main(string[] args)
        {
            int[,] state = GetInputArray();
 
            for (int i = 0; i < 5; i++)
            {
                state = NexStep(state, i, (int)Math.Sqrt((double)state.LongLength));
                Console.WriteLine("**************************************************");
            }
 
            Console.ReadLine();
        }
 
        private static int[,] GetInputArray()
        {
            //int[,] state = new int[3, 3];
            //state[0, 0] = 0;
            //state[0, 1] = 1;
            //state[0, 2] = 0;
 
            //state[1, 0] = 0;
            //state[1, 1] = 1;
            //state[1, 2] = 0;
 
            //state[2, 0] = 0;
            //state[2, 1] = 1;
            //state[2, 2] = 0;
 
            int[,] state = new int[5, 5];
            state[0, 0] = 0;
            state[0, 1] = 1;
            state[0, 2] = 1;
            state[0, 3] = 1;
            state[0, 4] = 0;
 
            state[1, 0] = 0;
            state[1, 1] = 1;
            state[1, 2] = 0;
            state[1, 3] = 1;
            state[1, 4] = 0;
 
            state[2, 0] = 0;
            state[2, 1] = 1;
            state[2, 2] = 0;
            state[2, 3] = 1;
            state[2, 4] = 0;
 
            state[3, 0] = 0;
            state[3, 1] = 1;
            state[3, 2] = 1;
            state[3, 3] = 1;
            state[3, 4] = 0;
 
            state[4, 0] = 1;
            state[4, 1] = 1;
            state[4, 2] = 0;
            state[4, 3] = 1;
            state[4, 4] = 0;
            
            return state;
        }
 
        private static int[,] NexStep(int[,] state, int stepCounter, int arrSize)
        {
            int[,] newState= null;
 
            if (stepCounter > 0)
                newState = GetNextState(state,arrSize);
            else
                newState = state;
 
            PrintCurrentState(newState, arrSize);
 
            return newState;
        }
 
        private static void PrintCurrentState(int[,] state, int size)
        {
            for (int i = 0; i < size; i++)
            {
                for (int j = 0; j < size; j++)
                {
                    Console.Write(state[i, j]);
                    Console.Write(" ");
                }
 
                Console.Write(Environment.NewLine);
            }
        }
 
        static int[,] GetNextState(int[,] oldState, int arrSize)
        {
            int[,] newState = new int[arrSize, arrSize];
            for (int i = 0; i < arrSize; i++)
            {
                for (int j = 0; j < arrSize; j++)
                {
                    int aliveCells = GetAliveNeighbourCount(oldState, i, j, arrSize);
                    if (aliveCells < 2 || aliveCells > 3)
                        newState[i, j] = 0;
                    if (aliveCells == 2)
                        newState[i, j] = oldState[i, j];
                    if (aliveCells == 3)
                        newState[i, j] = 1;
                }
            }
 
            return newState;
        }
 
        static int GetAliveNeighbourCount(int[,] currState, int x, int y, int arrSize)
        {
            int sumOfCells = 0;
            for(int i = x - 1; i <= x + 1; i ++)
            {
                for (int j = y - 1; j <= y + 1; j++)
                {
                    if (i < 0 || j < 0) continue;
                    if (i >= arrSize || j >= arrSize) continue;
                    if (i == x && j == y) continue;
                    sumOfCells += currState[i, j];
                }
            }
 
            return sumOfCells;
        }
    }

Wednesday 21 November 2012

Performance issue in Excel Add-in with Range.Cells


Recently I worked on an interesting problem. My friend was developing an add-in for Excel 2010 and had a requirement to read content of each cell into a DataTable. Due to some restrictions with "named range" feature, he had to resort to reading value of each cell in a selected area. The code seemed alright but for some unexplained reason it was very slow.

The add-in used standard Visual Studio template for Office add-in projects and referenced Office Interop assemblies for performing operations on an excel file. The main code used a "for" loop through each cell and used Range.Cells[rowindex, columnindex].Value to read value from cell. 

The content in the selected range of sample file was also pretty small in size as it had 26 columns and 325 rows and each cell consisted of small string values e.g. "Test". Still, the add-in took about 18-20 seconds for reading the content.

After going through some results from web search, i came across this link in which it is suggested to assign Range.Value into a local variable and loop through that instead of looping through Range.Cells. After changing the code, the add-in was able to complete the operations in less than a second :). 

Lesson: Marshaling values between managed and unmanaged space is expensive and must be minimized. 

Calls to Range.Cells[i,j].Value are expensive and on an average took 2 milliseconds per call on my machine (dual core, 8 GB RAM) for the sample file. Below is a sample code i wrote to showcase the performance gain when Range.Values are used instead of Range.Cells[i,j].Value.

static void Main(string[] args)
        {
            var app = new Application();
            Workbook workbook = null;
            try
            {
                workbook = app.Workbooks.Open(@"D:\Excel.xlsx");
                Range rng = ((Worksheet)workbook.Sheets[1]).UsedRange;

                var arrValues = rng.Value;
                int maxrow = rng.Rows.Count;
                int maxcol = rng.Columns.Count;

                System.Data.DataTable dTable = new System.Data.DataTable("test");
                for (int i = 0; i < 26; i++)
                {
                    dTable.Columns.Add();
                }

                Stopwatch stopwatch = new Stopwatch();
                
                stopwatch.Start();
                ReadExcelIntoDataTable(arrValues, maxrow, maxcol, dTable);
                stopwatch.Stop();
                Console.WriteLine(stopwatch.ElapsedMilliseconds);

                long[] arr = new long[maxrow * maxcol];
                dTable.Clear();
                stopwatch.Restart();
                ReadExcelIntoDataTable2(rng, maxrow, maxcol, dTable, arr);
                stopwatch.Stop();
                Console.WriteLine(stopwatch.ElapsedMilliseconds);

                Console.WriteLine("Total cell read time : {0}", arr.Sum());
                Console.WriteLine("Average cell read time : {0}",arr.Average());
                Console.WriteLine("Min cell read time : {0}", arr.Min());
                Console.WriteLine("Max cell read time : {0}", arr.Max());

                Console.WriteLine("Total items : {0}", maxcol * maxrow);
            }
            finally
            {
                if (workbook != null)
                {
                    workbook.Close();
                    Marshal.ReleaseComObject(workbook);
                }

                if (app != null)
                {
                    Marshal.ReleaseComObject(app);
                }
            }

            Console.ReadLine();
        }
 private static void ReadExcelIntoDataTable(dynamic arrValues, int maxrow, int maxcol, System.Data.DataTable dTable)
        {
            DataRow dRow;
            for (int i = 2; i <= maxrow; i++)
            {
                dRow = dTable.NewRow();
                for (int j = 1; j <= maxcol; j++)
                {
                    dRow[j - 1] = arrValues[i, j];
                }
                dTable.Rows.Add(dRow);
            }
        }
 
        private static void ReadExcelIntoDataTable2(Range rng, int maxrow, int maxcol, System.Data.DataTable dTable, long[] arr)
        {
            int arrCounter = 0;
            DataRow dRow;
            Stopwatch stopwatch = new Stopwatch();
            for (int i = 2; i <= maxrow; i++)
            {
                dRow = dTable.NewRow();
                for (int j = 1; j <= maxcol; j++)
                {
                    stopwatch.Restart();
                    dRow[j - 1] = rng.Cells[i, j].Value;
                    stopwatch.Stop();
                    
                    arr[arrCounter] = stopwatch.ElapsedMilliseconds;
                    arrCounter++;
                }
                dTable.Rows.Add(dRow);
            }
        }

Below is the output of the program. Notice the reduction in execution time from 24+ seconds to 41 milliseconds :)

41
24150
Total cell read time : 19818
Average cell read time : 2.34532544378698
Min cell read time : 0
Max cell read time : 208
Total items : 8450