.net – C# class to dump the memory of a process in several formats

I made this because I couldn’t find any good C# classes/libraries that allow you to dump the contents of a process’ memory into a file or a byte array. I haven’t tested this on 32 bit systems but it works in both x86 and x64 on my 64 bit system. I’ve been mostly testing this on the notepad process which executes pretty quickly, but I’ve also tried this on the steam process with 120 mb memory which worked but took around 30 seconds. How can I improve this and what bugs have I overlooked? Any feedback is appreciated, thanks.

MemoryDump.cs

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Runtime.InteropServices;
using System.Text;

namespace MemoryDumper
{
    public struct Chunk
    {
        public Dumper.MEMORY_BASIC_INFORMATION MemoryInformation;
        public byte() Bytes;
    }

    public class MemoryDump
    {
        public MemoryDump(Chunk() chunks)
        {
            Chunks = chunks;

            long bytesSum = 0;
            foreach (var chunk in chunks)
                bytesSum += chunk.Bytes.Length;

            Bytes = new byte(bytesSum);

            int byteOffset = 0;
            foreach (var chunk in chunks)
            {
                for (int i = 0; i < chunk.Bytes.Length; i++)
                {
                    Bytes(byteOffset + i) = chunk.Bytes(i);
                }
                byteOffset += chunk.Bytes.Length;
            }
        }

        public readonly Chunk() Chunks;
        public readonly byte() Bytes;

        public string BytesString
        {
            get
            {
                byte() cleansedBytes = Bytes;
                for (int i = 0; i < Bytes.Length; i++)
                    cleansedBytes(i) = (byte)ToChar(cleansedBytes(i));

                return Encoding.UTF8.GetString(cleansedBytes);
            }
        }

        public static char ToChar(byte bt)
        {
            bool isPrintable = bt >= 32 && bt <= 126;
            return isPrintable ? (char)bt : '.';
        }

        (Flags)
        public enum DumpSaveOptions
        {
            // ex: 43 6F 77 4E 61 74 69 6F 6E
            BytesArray = 1,
            // ex: CowNation
            BytesString = 2,
            // ex: 43 6F 77 4E 61 74 69 6F 6E | CowNation
            Both = BytesArray | BytesString
        };

        public void Save(string filePath, DumpSaveOptions options = DumpSaveOptions.Both, int bytesPerLine = 56)
        {
            if (!File.Exists(filePath))
                throw new Exception("File '" + filePath + "' doesn't exist");

            bool bytesArray = (options & DumpSaveOptions.BytesArray) != 0;
            bool bytesString = (options & DumpSaveOptions.BytesString) != 0;

            StreamWriter streamWriter = new StreamWriter(filePath);
            for (int i = 0; i < Bytes.Length / bytesPerLine; i++)
            {
                // the bytes for this line
                byte() lineBytes = new byte(bytesPerLine);
                Array.Copy(Bytes, i * bytesPerLine, lineBytes, 0, bytesPerLine);

                string line = "";
                string dataString = "";
                foreach (byte bt in lineBytes)
                {
                    if (bytesArray)
                    {
                        string b = bt.ToString("X");
                        line += (b.Length == 1 ? "0" + b : b) + " ";
                    }

                    if (bytesString)
                        dataString += ToChar(bt);
                }
                line += (bytesArray && bytesString ? "| " : "") + dataString;

                streamWriter.WriteLine(line);
            }

            streamWriter.Close();
        }
    }

    public class Dumper
    {
        #region IMPORTS
        (DllImport("kernel32.dll"))
        static extern IntPtr OpenProcess(ProcessAccessFlags dwDesiredAccess, bool bInheritHandle, int dwProcessId);

        (DllImport("kernel32.dll", SetLastError = true))
        static extern bool ReadProcessMemory(
           IntPtr hProcess,
           IntPtr lpBaseAddress,
           byte() lpBuffer,
           int nSize,
           out IntPtr lpNumberOfBytesRead);

        (DllImport("kernel32.dll"))
        static extern void GetSystemInfo(out SYSTEM_INFO lpSystemInfo);

        (DllImport("kernel32.dll", SetLastError = true))
        static extern int VirtualQueryEx(IntPtr hProcess, IntPtr lpAddress, out MEMORY_BASIC_INFORMATION lpBuffer, uint dwLength);
        #endregion

        #region ENUMS
        public enum AllocationProtectEnum : uint
        {
            PAGE_EXECUTE = 0x00000010,
            PAGE_EXECUTE_READ = 0x00000020,
            PAGE_EXECUTE_READWRITE = 0x00000040,
            PAGE_EXECUTE_WRITECOPY = 0x00000080,
            PAGE_NOACCESS = 0x00000001,
            PAGE_READONLY = 0x00000002,
            PAGE_READWRITE = 0x00000004,
            PAGE_WRITECOPY = 0x00000008,
            PAGE_GUARD = 0x00000100,
            PAGE_NOCACHE = 0x00000200,
            PAGE_WRITECOMBINE = 0x00000400
        }

        public enum StateEnum : uint
        {
            MEM_COMMIT = 0x1000,
            MEM_FREE = 0x10000,
            MEM_RESERVE = 0x2000
        }

        public enum TypeEnum : uint
        {
            MEM_IMAGE = 0x1000000,
            MEM_MAPPED = 0x40000,
            MEM_PRIVATE = 0x20000
        }

        (Flags)
        enum ProcessAccessFlags : uint
        {
            All = 0x001F0FFF,
            Terminate = 0x00000001,
            CreateThread = 0x00000002,
            VirtualMemoryOperation = 0x00000008,
            VirtualMemoryRead = 0x00000010,
            VirtualMemoryWrite = 0x00000020,
            DuplicateHandle = 0x00000040,
            CreateProcess = 0x000000080,
            SetQuota = 0x00000100,
            SetInformation = 0x00000200,
            QueryInformation = 0x00000400,
            QueryLimitedInformation = 0x00001000,
            Synchronize = 0x00100000
        }
        #endregion

        #region STRUCTS
        public struct MEMORY_BASIC_INFORMATION
        {
            public IntPtr BaseAddress;
            public IntPtr AllocationBase;
            public AllocationProtectEnum AllocationProtect;
            public IntPtr RegionSize;
            public StateEnum State;
            public AllocationProtectEnum Protect;
            public TypeEnum Type;
        }

        (StructLayout(LayoutKind.Sequential))
        public struct SYSTEM_INFO
        {
            public ushort processorArchitecture;
            ushort reserved;
            public uint pageSize;
            public IntPtr minimumApplicationAddress;
            public IntPtr maximumApplicationAddress;
            public IntPtr activeProcessorMask;
            public uint numberOfProcessors;
            public uint processorType;
            public uint allocationGranularity;
            public ushort processorLevel;
            public ushort processorRevision;
        }
        #endregion

        public static MemoryDump Dump(Process process)
        {
            if (process == Process.GetCurrentProcess()) // a recursive memory allocation loop happens in this case until it runs out of memory
                throw new Exception("Cannot dump the memory of this process");

            List<Chunk> chunks = new List<Chunk>();

            SYSTEM_INFO systemInfo = new SYSTEM_INFO();
            GetSystemInfo(out systemInfo);

            IntPtr minimumAddress = systemInfo.minimumApplicationAddress;
            IntPtr maximumAddress = systemInfo.maximumApplicationAddress;

            IntPtr processHandle = OpenProcess(ProcessAccessFlags.QueryInformation | ProcessAccessFlags.VirtualMemoryRead, false, process.Id);
            if (processHandle == IntPtr.Zero)
                throw new Exception("Cannot get a handle to process");

            while (minimumAddress.ToInt64() < maximumAddress.ToInt64())
            {
                MEMORY_BASIC_INFORMATION memoryInformation = new MEMORY_BASIC_INFORMATION();
                VirtualQueryEx(processHandle, minimumAddress, out memoryInformation, (uint)Marshal.SizeOf(typeof(MEMORY_BASIC_INFORMATION)));

                // check if this chunk is accessible
                if (memoryInformation.Protect == AllocationProtectEnum.PAGE_READWRITE && memoryInformation.State == StateEnum.MEM_COMMIT)
                {
                    byte() buffer = new byte(memoryInformation.RegionSize.ToInt64());
                    ReadProcessMemory(processHandle, memoryInformation.BaseAddress, buffer, memoryInformation.RegionSize.ToInt32(), out IntPtr bytesRead);

                    chunks.Add(new Chunk
                    {
                        MemoryInformation = memoryInformation,
                        Bytes = buffer
                    });
                }

                // move to the next chunk
                try
                {
                    minimumAddress = new IntPtr(minimumAddress.ToInt64() + memoryInformation.RegionSize.ToInt64());
                }
                catch (OverflowException)
                {
                    break;
                }
            }

            return new MemoryDump(chunks.ToArray());
        }
    }
}

Example Usage:

    var dump = Dumper.Dump(Process.GetProcessesByName("notepad")(0));
    dump.Save("dump.txt");
    File.WriteAllText("bytesString.txt", dump.BytesString);

teamwork – Best git team process for developers working at different speeds

I’m looking for the best way to set up git for a team where some developers are very slow, and others very quick.

The current setup is, I think, pretty standard with developers each working in their own feature branch and then merging with the development branch which is then deployed to our QA environment for UA testing. It is at this stage that a lot of the junior developer’s code is being failed by the QAs and sent back to fix mistakes.

The only issue is that their failed commits will remain in the development/QA branch/environment, and any later commits to that branch will need to wait for the junior developer to fix their mistakes and be passed by QA, before the later commits can also be released to production.

Our development/QA branch is very much fixed to the single QA environment that we have, and for reasons that I won’t go into we are unable to have multiple testing environments.

Is there a standard way to pull back just the failed commits from the development/QA branch so that they don’t affect the other developers’ route to production? Or, if we have just a single testing environment, would it be better to only allow a single developer’s feature branch to be deployed there before either being passed and merged with production, or being failed and reverted to the tip of the development branch?

Thanks

stochastic processes – Can one change the dimension of a Bessel process by a Girsanov change of measure?

Recall that a (squared) Bessel process $X_t$ with the dimension $delta_0>0$ is the solution of the SDE
$$d X_t = 2,sqrt{X_t},d W_t+delta_0,d t.$$
A naive application of the Girsanov Theorem seems to imply that, in some equivalent new measure (i.e., absolutely continuous w.r.t. the old one), the same process may have a different value $delta_1$ of the dimension parameter.

I believe that this is not really possible to do rigorously, at least not for an arbitrary pair of values $delta_0,delta_1geq 0$. What is the exact statement of result here — is it ever possible to change the dimension of a Bessel process by an absolutely continuous change of measure, and if so, under what conditions on $delta_0$ and $delta_1$?

design – Architecture issue re best IPC method for multiple file descriptors in one process

This is question about architecture in an application that uses POSIX IPC to communicate between threads. The application uses multiple threads (clients) to send data to a single receiving (server) thread. Each thread is assigned to a separate core using its affinity mask. The threads are all within a single process – no process boundaries to cross. The most important factors are performance and reliability.

Currently I use a named pipe (FIFO) to communicate between the multiple writers and the single reader. The writers all use the same file descriptor and the reader reads from a single pipe.

However, the data must be processed in core (thread) order, with core 0 first, then core 1, then core 2, etc. With only a single pipe the application must organize the incoming messages in core order which adds extra processing overhead. The messages are added to a memory buffer maintained by the server side.

A better architecture from the standpoint of the reader (server) would be to use a separate pipe/socket/shared memory (or other IPC method) for each client. The server would read from each of the client file descriptors in core order, processing each record as it comes in, then read and process data from the next core, in a round-robin fashion. That way the server does not need to organize and process the records in core order, which is expensive. The server just receives them one at a time and processes them immediately upon receipt, then read from the next core in sequence, etc. No expense of a memory buffer or the overhead of organizing the records as they come in.

My question is, given the requirement described above, which of the POSIX IPC methods would be the best and most performant solution for this situation? I’m planning to go up to as many as 64 cores, so I would need up to as many as 63 file descriptors for the client side. I don’t need bidirectional commo.

The lowest system overhead would (I think) be an anonymous pipe. The server side could simply loop through an array of file descriptors to read the data. However, I’m not clear whether an anonymous pipe can be used for threads in a single process because, “It is not very useful for a single process to use a pipe to talk to itself. In typical use, a process creates a pipe just before it forks one or more child processes.” https://www.gnu.org/software/libc/manual/html_node/Creating-a-Pipe.html#Creating-a-Pipe

I currently use named pipes, which do work with threads in a single process, and which should work with multiple file descriptors.

I have also used UNIX domain datagram sockets with a single socket. My impression is that multiple sockets may be more system overhead than I need for this situation, but they may be the most performant solution.

Finally, I have considered POSIX shared memory, where each client thread has its own shared memory object. Shared memory is often described as the fastest IPC mechanism (https://www.softprayog.in/programming/interprocess-communication-using-posix-shared-memory-in-linux)

But with shared memory, there is the problem of synchronization. While the other IPC methods are basically queues where the data can be read one record at a time, shared memory requires a synchronization object like a semaphore or spinlock. As the man pages say, “Typically, processes must synchronize their access to a shared memory object, using, for example, POSIX semaphores.” (https://www.man7.org/linux/man-pages/man7/shm_overview.7.html.)
My concern is that the extra synchronization overhead may reduce the usefulness of shared memory in this situation.

Moreover, despite being billed as the fastest method, I am concerned about possible cache contention with shared memory. “(M)any CPUs need fast access to memory and will likely cache memory, which has two complications (access time and data coherence).” https://en.wikipedia.org/wiki/Shared_memory.

I could test each of these solutions, but before I choose one it would be helpful to ask the opinions of others on which IPC method would be the best for multiple pipes/sockets/shared memory for multiple clients, as described above.

probability distributions – Quantiles of a Levy process

Let $X = { X_t in {bf R}, t geq 0 }$ be a 1-dimensional (real) Levy process. Suppose further that the distribution of $X_t$ is not concentrated on a grid. (This forces the distribution of $X_t$ to have a Lebesgue density).

For a fixed $p in (0,1)$, let $Q_t(p)$ be the quantile function of $X_t$, i.e $$ {bf P}(X_t leq Q_t(p)) = p. $$

For Brownian motion with drift, $X_t = B_t + alpha t$, $Q_t(1/2)$ is a linear function of $t$. Indeed, $Q_t(1/2) = alpha t$. For $p neq 1/2$, $Q_t(p)$ is not a linear function.

Does any other Levy process have this property? In other words, for a general Levy process, can one find a $p$ that makes $Q_t(p)$ a linear function of time?

Approval action- reassigning the task to the approver that made a mistake during approval process

I have used the action Approval process in the Sharepoint designer.
I am using the approval process to validated a lost item with multiple people. The approvers are determine in the list column.

I am now wondering if there is a chance that I can modify the workflow to be able to reassign the task once it has already been completed. To explain in more detail, since i have multiple approvers for one list item multiple tasks will be created for each approval. I have encountered a problem here if one of the approvers makes a mistake. If i restart the approval process another task will be created for all approvers, not only the one that made a mistake. How can i fix that once the task is already approved/rejected i can reassign it to the same approval without having to create new tasks for the same list item.

Question on probability and random process

How can one define the variance of N-dimensional random variable ( X1, X2,…., XN ).Also how to compute the variance of a linear combination of N-random variables.

multishipping – Payment options for multi shipping checkout process

We’re in the process of upgrading to Magento 2.3.3. We’re discovering that multi shipping payments are being deprecated because of the EU Payment Service Directive (PSD2).

Does anybody have experience with a payment service that works with Magento 2.3.3 and forward that works for multi shipping? What did it take to make it work?

Any info would be greatly appreciated.

Process synchronization problem

The atomic fetch-and-set x,y instruction unconditionally sets the memory location x to 1 and fetches the old value of x in y without allowing any intervening access to the memory location x. Consider the following implementation of P and V functions on a binary semaphore S.

   void P (binary_semaphore *s) { 
        unsigned y; 
        unsigned *x = &(s->value); 
        do { 
            fetch-and-set x, y; 
        } while (y); 
    }

    void V (binary_semaphore *s) { 
        S->value = 0; 
    } 

Which one of the following is true?

  1. The implementation may not work if context switching is disabled in P()
  2. Instead of using fetch-and–set, a pair of normal load/store can be used
  3. The implementation of V is wrong
  4. The code does not implement a binary semaphore

Option 1 is correct. I am unable to understand the reason for it. Please explain.

development process – Running multiple version of code

How should I set up my project when I want to run the present version of a class against previous versions? I’m interested in issues related to code organization, file naming, and source control.

I have a stable version A of my code, and am working on some accuracy improvements for version B. (It’s a forecasting model in Python, but my question is perhaps broader than either of those things.)

To test the new changes, I’d like to run a test data set through both version A and version B. In the future I’d start on version C, and I’d like to be able to run C alongside B (and maybe A).

I have seen suggestions for feature flags. So I’d have one class, but I’d specify if I want the A or B (or C) logic. I think this would get messy quickly. It would be easier to read if I had a version with just the A code.

What’s the best way to do this?