Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /****************************    macho.h    ****************************************
  2. * Author:        Agner Fog
  3. * Date created:  2007-01-06
  4. * Last modified: 2008-05-23
  5. * Project:       objconv
  6. * Module:        macho.h
  7. * Description:
  8. * Header file for definition of data structures in 32 bit Mach-O object file.
  9. * Also defines class MacSymbolTableBuilder
  10. * Also defines structures for MacIntosh universal binaries
  11. *
  12. * Copyright 2006-2008 GNU General Public License http://www.gnu.org/licenses
  13. * Parts (c) 2003 Apple public source license http://www.opensource.apple.com/apsl/
  14. ***********************************************************************************/
  15. #ifndef MACHO_H
  16. #define MACHO_H
  17.  
  18. /********************** FILE HEADER **********************/
  19.  
  20. struct MAC_header_32 {
  21.         uint32  magic;          // mach magic number identifier
  22.         uint32  cputype;           // cpu specifier
  23.         uint32  cpusubtype;     // machine specifier
  24.         uint32  filetype;       // type of file
  25.         uint32  ncmds;          // number of load commands
  26.         uint32  sizeofcmds;     // the size of all the load commands
  27.         uint32   flags;         // flags
  28. };
  29.  
  30. struct MAC_header_64 {
  31.         uint32  magic;          // mach magic number identifier
  32.         uint32  cputype;           // cpu specifier
  33.         uint32  cpusubtype;     // machine specifier
  34.         uint32  filetype;       // type of file
  35.         uint32  ncmds;          // number of load commands
  36.         uint32  sizeofcmds;     // the size of all the load commands
  37.         uint32   flags;         // flags
  38.    uint32   reserved;   // reserved for future use
  39. };
  40.  
  41.  
  42. // Constant for the magic field of the MAC_header (32-bit architectures)
  43. #define MAC_MAGIC_32   0xFEEDFACE  // 32 bit little endian
  44. #define  MAC_MAGIC_64   0xFEEDFACF  // 64 bit little endian
  45. #define MAC_CIGAM_32   0xCEFAEDFE  // 32 bit big endian
  46. #define  MAC_CIGAM_64   0xCFFAEDFE  // 64 bit big endian
  47. #define MAC_CIGAM_UNIV 0xBEBAFECA  // MacIntosh universal binary
  48.  
  49. // Constants for cputype
  50. #define MAC_CPU_TYPE_I386      7
  51. #define MAC_CPU_TYPE_X86_64    0x1000007
  52. #define MAC_CPU_TYPE_ARM       12
  53. #define MAC_CPU_TYPE_SPARC     14
  54. #define MAC_CPU_TYPE_POWERPC   18
  55. #define MAC_CPU_TYPE_POWERPC64 0x1000012
  56.  
  57. // Constants for cpusubtype
  58. #define MAC_CPU_SUBTYPE_I386_ALL     3
  59. #define MAC_CPU_SUBTYPE_X86_64_ALL   3
  60. #define MAC_CPU_SUBTYPE_ARM_ALL      0
  61. #define MAC_CPU_SUBTYPE_SPARC_ALL    0
  62. #define MAC_CPU_SUBTYPE_POWERPC_ALL  0
  63.  
  64. // Constants for the filetype field of the MAC_header
  65. #define MAC_OBJECT   0x1                /* relocatable object file */
  66. #define MAC_EXECUTE      0x2            /* demand paged executable file */
  67. #define MAC_FVMLIB       0x3            /* fixed VM shared library file */
  68. #define MAC_CORE                 0x4            /* core file */
  69. #define MAC_PRELOAD      0x5            /* preloaded executable file */
  70. #define MAC_DYLIB        0x6            /* dynamicly bound shared library file*/
  71. #define MAC_DYLINKER 0x7           /* dynamic link editor */
  72. #define MAC_BUNDLE       0x8            /* dynamicly bound bundle file */
  73.  
  74. // Constants for the flags field of the MAC_header
  75. #define MAC_NOUNDEFS                   0x1 // the object file has no undefined references, can be executed
  76. #define MAC_INCRLINK                   0x2 // the object file is the output of an incremental link against a base file and can't be link edited again
  77. #define MAC_DYLDLINK                      0x4 // the object file is input for the dynamic linker and can't be staticly link edited again
  78. #define MAC_BINDATLOAD                 0x8 // the object file's undefined references are bound by the dynamic linker when loaded.
  79. #define MAC_PREBOUND                     0x10 // the file has it's dynamic undefined references prebound.
  80. #define MAC_SPLIT_SEGS                0x20 // the file has its read-only and read-write segments split
  81. #define MAC_LAZY_INIT                 0x40 // the shared library init routine is to be run lazily via catching memory faults to its writeable segments (obsolete)
  82. #define MAC_TWOLEVEL                  0x80 // the image is using two-level name space bindings
  83. #define MAC_FORCE_FLAT               0x100 // the executable is forcing all images to use flat name space bindings
  84. #define MAC_NOMULTIDEFS              0x200 // this umbrella guarantees no multiple defintions of symbols in its sub-images so the two-level namespace hints can always be used
  85. #define MAC_NOFIXPREBINDING          0x400 // do not have dyld notify the prebinding agent about this executable
  86. #define MAC_PREBINDABLE              0x800 // the binary is not prebound but can have its prebinding redone. only used when MH_PREBOUND is not set
  87. #define MAC_ALLMODSBOUND            0x1000 // indicates that this binary binds to all two-level namespace modules of its dependent libraries. only used when MH_PREBINDABLE and MH_TWOLEVEL are both set
  88. #define MAC_SUBSECTIONS_VIA_SYMBOLS 0x2000 // safe to divide up the sections into sub-sections via symbols for dead code stripping
  89. #define MAC_CANONICAL               0x4000 // the binary has been canonicalized via the unprebind operation
  90.  
  91. //??
  92. #define MAC_VM_PROT_NONE           0x00
  93. #define MAC_VM_PROT_READ           0x01
  94. #define MAC_VM_PROT_WRITE          0x02
  95. #define MAC_VM_PROT_EXECUTE        0x04
  96. #define MAC_VM_PROT_ALL            0x07
  97.  
  98. // Load commands
  99. struct MAC_load_command {
  100.         uint32 cmd;                      // type of load command
  101.         uint32 cmdsize;       // total size of command in bytes
  102. };
  103.  
  104. // Constants for the cmd field of all load commands, the type
  105. #define MAC_LC_REQ_DYLD  0x80000000 // This bit is added if unknown command cannot be ignored
  106. #define MAC_LC_SEGMENT          0x1 /* segment of this file to be mapped */
  107. #define MAC_LC_SYMTAB           0x2     /* link-edit stab symbol table info */
  108. #define MAC_LC_SYMSEG           0x3     /* link-edit gdb symbol table info (obsolete) */
  109. #define MAC_LC_THREAD           0x4     /* thread */
  110. #define MAC_LC_UNIXTHREAD            0x5        /* unix thread (includes a stack) */
  111. #define MAC_LC_LOADFVMLIB            0x6        /* load a specified fixed VM shared library */
  112. #define MAC_LC_IDFVMLIB         0x7     /* fixed VM shared library identification */
  113. #define MAC_LC_IDENT               0x8  /* object identification info (obsolete) */
  114. #define MAC_LC_FVMFILE          0x9     /* fixed VM file inclusion (internal use) */
  115. #define MAC_LC_PREPAGE          0xa /* prepage command (internal use) */
  116. #define MAC_LC_DYSYMTAB         0xb     /* dynamic link-edit symbol table info */
  117. #define MAC_LC_LOAD_DYLIB            0xc        /* load a dynamicly linked shared library */
  118. #define MAC_LC_ID_DYLIB         0xd     /* dynamicly linked shared lib identification */
  119. #define MAC_LC_LOAD_DYLINKER    0xe     /* load a dynamic linker */
  120. #define MAC_LC_ID_DYLINKER           0xf        /* dynamic linker identification */
  121. #define MAC_LC_PREBOUND_DYLIB  0x10     /* modules prebound for a dynamicly linked shared library */
  122. #define MAC_LC_ROUTINES        0x11     /* image routines */
  123. #define MAC_LC_SUB_FRAMEWORK   0x12 /* sub framework */
  124. #define MAC_LC_SUB_UMBRELLA    0x13 /* sub umbrella */
  125. #define MAC_LC_SUB_CLIENT      0x14 /* sub client */
  126. #define MAC_LC_SUB_LIBRARY     0x15 /* sub library */
  127. #define MAC_LC_TWOLEVEL_HINTS  0x16 /* two-level namespace lookup hints */
  128. #define MAC_LC_PREBIND_CKSUM   0x17 /* prebind checksum */
  129. #define MAC_LC_LOAD_WEAK_DYLIB (0x18 | MAC_LC_REQ_DYLD)
  130. #define MAC_LC_SEGMENT_64      0x19 /* 64-bit segment of this file to be mapped */
  131. #define MAC_LC_ROUTINES_64     0x1a /* 64-bit image routines */
  132. #define MAC_LC_UUID            0x1b /* the uuid */
  133.  
  134. /*
  135.  * The segment load command indicates that a part of this file is to be
  136.  * mapped into the task's address space.  The size of this segment in memory,
  137.  * vmsize, maybe equal to or larger than the amount to map from this file,
  138.  * filesize.  The file is mapped starting at fileoff to the beginning of
  139.  * the segment in memory, vmaddr.  The rest of the memory of the segment,
  140.  * if any, is allocated zero fill on demand.  The segment's maximum virtual
  141.  * memory protection and initial virtual memory protection are specified
  142.  * by the maxprot and initprot fields.  If the segment has sections then the
  143.  * section structures directly follow the segment command and their size is
  144.  * reflected in cmdsize.
  145.  */
  146. struct MAC_segment_command_32 { /* for 32-bit architectures */
  147.         uint32  cmd;                  /* LC_SEGMENT */
  148.         uint32  cmdsize;              /* includes sizeof section structs */
  149.         char            segname[16];    /* segment name */
  150.         uint32  vmaddr;            /* memory address of this segment */
  151.         uint32  vmsize;            /* memory size of this segment */
  152.         uint32  fileoff;              /* file offset of this segment */
  153.         uint32  filesize;          /* amount to map from the file */
  154.         uint32  maxprot;        /* maximum VM protection */
  155.         uint32  initprot;          /* initial VM protection */
  156.         uint32  nsects;            /* number of sections in segment */
  157.         uint32  flags;             /* flags */
  158. };
  159.  
  160. /*
  161.  * The 64-bit segment load command indicates that a part of this file is to be
  162.  * mapped into a 64-bit task's address space.  If the 64-bit segment has
  163.  * sections then section_64 structures directly follow the 64-bit segment
  164.  * command and their size is reflected in cmdsize.
  165.  */
  166. struct MAC_segment_command_64 { /* for 64-bit architectures */
  167.         uint32  cmd;                /* LC_SEGMENT_64 */
  168.         uint32  cmdsize;            /* includes sizeof section_64 structs */
  169.         char            segname[16]; /* segment name */
  170.         uint64  vmaddr;          /* memory address of this segment */
  171.         uint64  vmsize;          /* memory size of this segment */
  172.         uint64  fileoff;            /* file offset of this segment */
  173.         uint64  filesize;        /* amount to map from the file */
  174.         uint32  maxprot;            /* maximum VM protection */
  175.         uint32  initprot;        /* initial VM protection */
  176.         uint32  nsects;          /* number of sections in segment */
  177.         uint32  flags;           /* flags */
  178. };
  179.  
  180.  
  181. /* Constants for the flags field of the segment_command */
  182. #define MAC_SG_HIGHVM   0x1     // the file contents for this segment is for the high part of the
  183.                               // VM space, the low part is zero filled (for stacks in core files)
  184. #define MAC_SG_FVMLIB   0x2     // this segment is the VM that is allocated by a fixed VM library,
  185.                               // for overlap checking in the link editor
  186. #define MAC_SG_NORELOC  0x4     // this segment has nothing that was relocated in it and nothing
  187.                               // relocated to it, that is it maybe safely replaced without relocation
  188.  
  189. /*
  190.  * A segment is made up of zero or more sections.  Non-MH_OBJECT files have
  191.  * all of their segments with the proper sections in each, and padded to the
  192.  * specified segment alignment when produced by the link editor.  The first
  193.  * segment of a MH_EXECUTE and MH_FVMLIB format file contains the mach_header
  194.  * and load commands of the object file before it's first section.  The zero
  195.  * fill sections are always last in their segment (in all formats).  This
  196.  * allows the zeroed segment padding to be mapped into memory where zero fill
  197.  * sections might be. The gigabyte zero fill sections, those with the section
  198.  * type S_GB_ZEROFILL, can only be in a segment with sections of this type.
  199.  * These segments are then placed after all other segments.
  200.  *
  201.  * The MH_OBJECT format has all of it's sections in one segment for
  202.  * compactness.  There is no padding to a specified segment boundary and the
  203.  * mach_header and load commands are not part of the segment.
  204.  *
  205.  * Sections with the same section name, sectname, going into the same segment,
  206.  * segname, are combined by the link editor.  The resulting section is aligned
  207.  * to the maximum alignment of the combined sections and is the new section's
  208.  * alignment.  The combined sections are aligned to their original alignment in
  209.  * the combined section.  Any padded bytes to get the specified alignment are
  210.  * zeroed.
  211.  *
  212.  * The format of the relocation entries referenced by the reloff and nreloc
  213.  * fields of the section structure for mach object files is described in the
  214.  * header file <reloc.h>.
  215.  */
  216. struct MAC_section_32 {       /* for 32-bit architectures */
  217.         char            sectname[16];   /* name of this section */
  218.         char            segname[16];    /* segment this section goes in */
  219.         uint32  addr;                 /* memory address of this section */
  220.         uint32  size;                 /* size in bytes of this section */
  221.         uint32  offset;         /* file offset of this section */
  222.         uint32  align;             /* section alignment (power of 2) */
  223.         uint32  reloff;            /* file offset of relocation entries */
  224.         uint32  nreloc;            /* number of relocation entries */
  225.         uint32  flags;             /* flags (section type and attributes)*/
  226.         uint32  reserved1;         /* reserved */
  227.         uint32  reserved2;         /* reserved */
  228. };
  229.  
  230. struct MAC_section_64 {    /* for 64-bit architectures */
  231.         char            sectname[16];   /* name of this section */
  232.         char            segname[16];    /* segment this section goes in */
  233.         uint64  addr;                 /* memory address of this section */
  234.         uint64  size;                 /* size in bytes of this section */
  235.         uint32  offset;            /* file offset of this section */
  236.         uint32  align;             /* section alignment (power of 2) */
  237.         uint32  reloff;            /* file offset of relocation entries */
  238.         uint32  nreloc;            /* number of relocation entries */
  239.         uint32  flags;             /* flags (section type and attributes)*/
  240.         uint32  reserved1;         /* reserved (for offset or index) */
  241.         uint32  reserved2;         /* reserved (for count or sizeof) */
  242.         uint32  reserved3;         // reserved (Note: specified in loader.h, but not in MachORuntime.pdf)
  243. };
  244.  
  245.  
  246. /* The flags field of a section structure is separated into two parts a section
  247.  * type and section attributes.  The section types are mutually exclusive (it
  248.  * can only have one type) but the section attributes are not (it may have more
  249.  * than one attribute).  */
  250.  
  251. #define MAC_SECTION_TYPE                    0x000000ff  /* 256 section types */
  252. #define MAC_SECTION_ATTRIBUTES   0xffffff00     /*  24 section attributes */
  253.  
  254. /* Constants for the type of a section */
  255. #define MAC_S_REGULAR                 0x0        /* regular section */
  256. #define MAC_S_ZEROFILL                0x1        /* zero fill on demand section */
  257. #define MAC_S_CSTRING_LITERALS  0x2      /* section with only literal C strings*/
  258. #define MAC_S_4BYTE_LITERALS       0x3    /* section with only 4 byte literals */
  259. #define MAC_S_8BYTE_LITERALS       0x4   /* section with only 8 byte literals */
  260. #define MAC_S_LITERAL_POINTERS  0x5      /* section with only pointers to literals */
  261.  
  262. /* For the two types of symbol pointers sections and the symbol stubs section
  263.  * they have indirect symbol table entries.  For each of the entries in the
  264.  * section the indirect symbol table entries, in corresponding order in the
  265.  * indirect symbol table, start at the index stored in the reserved1 field
  266.  * of the section structure.  Since the indirect symbol table entries
  267.  * correspond to the entries in the section the number of indirect symbol table
  268.  * entries is inferred from the size of the section divided by the size of the
  269.  * entries in the section.  For symbol pointers sections the size of the entries
  270.  * in the section is 4 bytes and for symbol stubs sections the byte size of the
  271.  * stubs is stored in the reserved2 field of the section structure. */
  272.  
  273. #define  MAC_S_NON_LAZY_SYMBOL_POINTERS 0x6  // section with only non-lazy symbol pointers
  274. #define  MAC_S_LAZY_SYMBOL_POINTERS                0x7  // section with only lazy symbol pointers
  275. #define  MAC_S_SYMBOL_STUBS                 0x8  // section with only symbol stubs, byte size of stub in the reserved2 field
  276. #define  MAC_S_MOD_INIT_FUNC_POINTERS      0x9  // section with only function pointers for initialization
  277. #define  MAC_S_MOD_TERM_FUNC_POINTERS      0xa  // section with only function pointers for termination
  278. #define  MAC_S_COALESCED                  0xb  // section contains symbols that are to be coalesced
  279. #define  MAC_S_GB_ZEROFILL                0xc  // zero fill on demand section that can be larger than 4 gigabytes
  280. #define  MAC_S_INTERPOSING                0xd  // section with only pairs of function pointers for interposing
  281. #define  MAC_S_16BYTE_LITERALS            0xe  // section with only 16 byte literals
  282.  
  283.  
  284. // Constants for the section attributes part of the flags field of a section structure.
  285.  
  286. #define MAC_SECTION_ATTRIBUTES_USR        0xff000000    /* User setable attributes */
  287. #define MAC_S_ATTR_PURE_INSTRUCTIONS  0x80000000        /* section contains only true machine instructions */
  288. #define MAC_S_ATTR_NO_TOC             0x40000000        /* section contains coalesced symbols that are not to be in a ranlib table of contents */
  289. #define MAC_S_ATTR_STRIP_STATIC_SYMS  0x20000000        /* ok to strip static symbols in this section in files with the MH_DYLDLINK flag */
  290. #define MAC_S_ATTR_NO_DEAD_STRIP      0x10000000        /* no dead stripping */
  291. #define MAC_S_ATTR_LIVE_SUPPORT       0x08000000        /* blocks are live if they reference live blocks */
  292. #define MAC_S_ATTR_SELF_MODIFYING_CODE 0x04000000       /* Used with i386 code stubs written on by dyld */
  293. #define MAC_S_ATTR_DEBUG              0x02000000        /* a debug section */
  294. #define MAC_SECTION_ATTRIBUTES_SYS        0x00ffff00    /* system setable attributes */
  295. #define MAC_S_ATTR_SOME_INSTRUCTIONS  0x00000400        /* section contains some machine instructions */
  296. #define MAC_S_ATTR_EXT_RELOC            0x00000200      /* section has external relocation entries */
  297. #define MAC_S_ATTR_LOC_RELOC            0x00000100      /* section has local relocation entries */
  298.  
  299.  
  300. /* The names of segments and sections in them are mostly meaningless to the
  301.  * link-editor.  But there are few things to support traditional UNIX
  302.  * executables that require the link-editor and assembler to use some names
  303.  * agreed upon by convention.
  304.  *
  305.  * The initial protection of the "__TEXT" segment has write protection turned
  306.  * off (not writeable).
  307.  *
  308.  * The link-editor will allocate common symbols at the end of the "__common"
  309.  * section in the "__DATA" segment.  It will create the section and segment
  310.  * if needed. */
  311.  
  312. /* The currently known segment names and the section names in those segments */
  313.  
  314. #define MAC_SEG_PAGEZERO            "__PAGEZERO"      // the pagezero segment which has no protections and catches NULL references for MH_EXECUTE files
  315. #define MAC_SEG_TEXT           "__TEXT"          // the tradition UNIX text segment
  316. #define MAC_SECT_TEXT          "__text"          // the real text part of the text section no headers, and no padding
  317. #define  MAC_SECT_FVMLIB_INIT0 "__fvmlib_init0"  // the fvmlib initialization section
  318. #define  MAC_SECT_FVMLIB_INIT1 "__fvmlib_init1"  // the section following the fvmlib initialization section
  319. #define MAC_SEG_DATA           "__DATA"        // the tradition UNIX data segment
  320. #define MAC_SECT_DATA          "__data"          // the real initialized data section no padding, no bss overlap
  321. #define MAC_SECT_BSS           "__bss"                 // the real uninitialized data section no padding
  322. #define  MAC_SECT_COMMON            "__common"         // the section common symbols are allocated in by the link editor
  323. #define MAC_SEG_OBJC           "__OBJC"        // objective-C runtime segment
  324. #define  MAC_SECT_OBJC_SYMBOLS "__symbol_table"  // symbol table
  325. #define  MAC_SECT_OBJC_MODULES "__module_info"   // module information
  326. #define  MAC_SECT_OBJC_STRINGS "__selector_strs" // string table
  327. #define  MAC_SECT_OBJC_REFS    "__selector_refs" // string table
  328. #define MAC_SEG_ICON           "__ICON"          // the NeXT icon segment
  329. #define MAC_SECT_ICON_HEADER  "__header"        // the icon headers
  330. #define MAC_SECT_ICON_TIFF    "__tiff"          // the icons in tiff format
  331. #define MAC_SEG_LINKEDIT            "__LINKEDIT"      // the segment containing all structs created and maintained by the link editor.  Created with -seglinkedit option to ld(1) for MH_EXECUTE and FVMLIB file types only
  332. #define  MAC_SEG_UNIXSTACK          "__UNIXSTACK"           // the unix stack segment
  333. #define  MAC_SEG_IMPORT        "__IMPORT"        // the segment for the self (dyld) modifing code stubs that has read, write and execute permissions
  334.  
  335.  
  336. /* The symtab_command contains the offsets and sizes of the link-edit 4.3BSD
  337.  * "stab" style symbol table information as described in the header files
  338.  * <nlist.h> and <stab.h>. */
  339.  
  340. struct MAC_symtab_command {
  341.         uint32  cmd;               /* LC_SYMTAB */
  342.         uint32  cmdsize;           /* sizeof(MAC_symtab_command) */
  343.         uint32  symoff;         /* symbol table offset */
  344.         uint32  nsyms;          /* number of symbol table entries */
  345.         uint32  stroff;         /* string table offset */
  346.         uint32  strsize;           /* string table size in bytes */
  347. };
  348.  
  349. /* This is the second set of the symbolic information which is used to support
  350.  * the data structures for the dynamicly link editor.
  351.  *
  352.  * The original set of symbolic information in the symtab_command which contains
  353.  * the symbol and string tables must also be present when this load command is
  354.  * present.  When this load command is present the symbol table is organized
  355.  * into three groups of symbols:
  356.  *      local symbols (static and debugging symbols) - grouped by module
  357.  *      defined external symbols - grouped by module (sorted by name if not lib)
  358.  *      undefined external symbols (sorted by name)
  359.  * In this load command there are offsets and counts to each of the three groups
  360.  * of symbols.
  361.  *
  362.  * This load command contains a the offsets and sizes of the following new
  363.  * symbolic information tables:
  364.  *      table of contents
  365.  *      module table
  366.  *      reference symbol table
  367.  *      indirect symbol table
  368.  * The first three tables above (the table of contents, module table and
  369.  * reference symbol table) are only present if the file is a dynamicly linked
  370.  * shared library.  For executable and object modules, which are files
  371.  * containing only one module, the information that would be in these three
  372.  * tables is determined as follows:
  373.  *      table of contents - the defined external symbols are sorted by name
  374.  *      module table - the file contains only one module so everything in the
  375.  *                     file is part of the module.
  376.  *      reference symbol table - is the defined and undefined external symbols
  377.  *
  378.  * For dynamicly linked shared library files this load command also contains
  379.  * offsets and sizes to the pool of relocation entries for all sections
  380.  * separated into two groups:
  381.  *      external relocation entries
  382.  *      local relocation entries
  383.  * For executable and object modules the relocation entries continue to hang
  384.  * off the section structures.  */
  385.  
  386. struct MAC_dysymtab_command {
  387.     uint32 cmd;         /* LC_DYSYMTAB */
  388.     uint32 cmdsize;     /* sizeof(struct dysymtab_command) */
  389.  
  390.     /* The symbols indicated by symoff and nsyms of the LC_SYMTAB load command
  391.      * are grouped into the following three groups:
  392.      *    local symbols (further grouped by the module they are from)
  393.      *    defined external symbols (further grouped by the module they are from)
  394.      *    undefined symbols
  395.      *
  396.      * The local symbols are used only for debugging.  The dynamic binding
  397.      * process may have to use them to indicate to the debugger the local
  398.      * symbols for a module that is being bound.
  399.      *
  400.      * The last two groups are used by the dynamic binding process to do the
  401.      * binding (indirectly through the module table and the reference symbol
  402.      * table when this is a dynamicly linked shared library file).    */
  403.  
  404.     uint32 ilocalsym;   // index to local symbols
  405.     uint32 nlocalsym;   // number of local symbols
  406.  
  407.     uint32 iextdefsym;  // index to externally defined symbols
  408.     uint32 nextdefsym;  // number of externally defined symbols
  409.  
  410.     uint32 iundefsym;   // index to undefined symbols
  411.     uint32 nundefsym;   // number of undefined symbols
  412.  
  413.     /* For the dynamic binding process to find which module a symbol
  414.      * is defined in the table of contents is used (analogous to the ranlib
  415.      * structure in an archive) which maps defined external symbols to modules
  416.      * they are defined in.  This exists only in a dynamicly linked shared
  417.      * library file.  For executable and object modules the defined external
  418.      * symbols are sorted by name and is use as the table of contents.     */
  419.  
  420.     uint32 tocoff;      /* file offset to table of contents */
  421.     uint32 ntoc;                /* number of entries in table of contents */
  422.  
  423.     /* To support dynamic binding of "modules" (whole object files) the symbol
  424.      * table must reflect the modules that the file was created from.  This is
  425.      * done by having a module table that has indexes and counts into the merged
  426.      * tables for each module.  The module structure that these two entries
  427.      * refer to is described below.  This exists only in a dynamicly linked
  428.      * shared library file.  For executable and object modules the file only
  429.      * contains one module so everything in the file belongs to the module.     */
  430.  
  431.     uint32 modtaboff;   /* file offset to module table */
  432.     uint32 nmodtab;        /* number of module table entries */
  433.  
  434.     /* To support dynamic module binding the module structure for each module
  435.      * indicates the external references (defined and undefined) each module
  436.      * makes.  For each module there is an offset and a count into the
  437.      * reference symbol table for the symbols that the module references.
  438.      * This exists only in a dynamicly linked shared library file.  For
  439.      * executable and object modules the defined external symbols and the
  440.      * undefined external symbols indicates the external references.     */
  441.  
  442.     uint32 extrefsymoff;  /* offset to referenced symbol table */
  443.     uint32 nextrefsyms;   /* number of referenced symbol table entries */
  444.  
  445.     /* The sections that contain "symbol pointers" and "routine stubs" have
  446.      * indexes and (implied counts based on the size of the section and fixed
  447.      * size of the entry) into the "indirect symbol" table for each pointer
  448.      * and stub.  For every section of these two types the index into the
  449.      * indirect symbol table is stored in the section header in the field
  450.      * reserved1.  An indirect symbol table entry is simply a 32bit index into
  451.      * the symbol table to the symbol that the pointer or stub is referring to.
  452.      * The indirect symbol table is ordered to match the entries in the section. */
  453.  
  454.     uint32 indirectsymoff; // file offset to the indirect symbol table
  455.     uint32 nindirectsyms;  // number of indirect symbol table entries
  456.  
  457.     /* To support relocating an individual module in a library file quickly the
  458.      * external relocation entries for each module in the library need to be
  459.      * accessed efficiently.  Since the relocation entries can't be accessed
  460.      * through the section headers for a library file they are separated into
  461.      * groups of local and external entries further grouped by module.  In this
  462.      * case the presents of this load command who's extreloff, nextrel,
  463.      * locreloff and nlocrel fields are non-zero indicates that the relocation
  464.      * entries of non-merged sections are not referenced through the section
  465.      * structures (and the reloff and nreloc fields in the section headers are
  466.      * set to zero).
  467.      *
  468.      * Since the relocation entries are not accessed through the section headers
  469.      * this requires the r_address field to be something other than a section
  470.      * offset to identify the item to be relocated.  In this case r_address is
  471.      * set to the offset from the vmaddr of the first LC_SEGMENT command.
  472.      *
  473.      * The relocation entries are grouped by module and the module table
  474.      * entries have indexes and counts into them for the group of external
  475.      * relocation entries for that the module.
  476.      *
  477.      * For sections that are merged across modules there must not be any
  478.      * remaining external relocation entries for them (for merged sections
  479.      * remaining relocation entries must be local).     */
  480.  
  481.     uint32 extreloff;   /* offset to external relocation entries */
  482.     uint32 nextrel;        /* number of external relocation entries */
  483.  
  484.     /* All the local relocation entries are grouped together (they are not
  485.      * grouped by their module since they are only used if the object is moved
  486.      * from it staticly link edited address).     */
  487.  
  488.     uint32 locreloff;   /* offset to local relocation entries */
  489.     uint32 nlocrel;     /* number of local relocation entries */
  490.  
  491. };     
  492.  
  493. /* An indirect symbol table entry is simply a 32bit index into the symbol table
  494.  * to the symbol that the pointer or stub is refering to.  Unless it is for a
  495.  * non-lazy symbol pointer section for a defined symbol which strip(1) as
  496.  * removed.  In which case it has the value INDIRECT_SYMBOL_LOCAL.  If the
  497.  * symbol was also absolute INDIRECT_SYMBOL_ABS is or'ed with that. */
  498.  
  499. #define MAC_INDIRECT_SYMBOL_LOCAL  0x80000000
  500. #define MAC_INDIRECT_SYMBOL_ABS    0x40000000
  501.  
  502. // Relocation entries
  503. /* Format of a relocation entry of a Mach-O file.  Modified from the 4.3BSD
  504.  * format.  The modifications from the original format were changing the value
  505.  * of the r_symbolnum field for "local" (r_extern == 0) relocation entries.
  506.  * This modification is required to support symbols in an arbitrary number of
  507.  * sections not just the three sections (text, data and bss) in a 4.3BSD file.
  508.  * Also the last 4 bits have had the r_type tag added to them. */
  509.  
  510. #define R_SCATTERED 0x80000000  // mask to be applied to the r_address field of a relocation_info structure to tell that
  511.                                  // is is really a scattered_relocation_info stucture
  512.  
  513. struct MAC_relocation_info {
  514.    uint32  r_address;      // offset in the section to what is being relocated (source)
  515.    uint32  r_symbolnum:24, // symbol table index (0-based) if r_extern == 1 or section number (1-based) if r_extern == 0
  516.            r_pcrel:1,      // pc relative. The target address (inline) is already pc relative
  517.            r_length:2,     // 0=byte, 1=word, 2=dword
  518.            r_extern:1,     // r_extern = 1 for symbols in symbol table
  519.            r_type:4;       // if not 0, machine specific relocation type
  520. };                         // The inline value of the source is the target address (pc-relative
  521.                            // or absolute) if r_extern = 0, or an addend if r_extern = 1.
  522.  
  523. struct MAC_scattered_relocation_info {
  524.    uint32  r_address:24,   // offset in the section to what is being relocated (source)
  525.            r_type:4,       // if not 0, machine specific relocation type
  526.            r_length:2,     // 0=byte, 1=word, 2=dword, 3=qword
  527.            r_pcrel:1,      // pc relative. The target address is already pc relative
  528.            r_scattered:1;  // 1=scattered, 0=non-scattered (see above)
  529.    int32   r_value;        // target address (without any offset added. The offset is stored inline in the source)
  530. };
  531.  
  532. // 32-bit relocation types:
  533. /* Relocation types used in a generic implementation.  Relocation entries for
  534.  * normal things use the generic relocation as discribed above and their r_type
  535.  * is GENERIC_RELOC_VANILLA (a value of zero).
  536.  *
  537.  * Another type of generic relocation, GENERIC_RELOC_SECTDIFF, is to support
  538.  * the difference of two symbols defined in different sections.  That is the
  539.  * expression "symbol1 - symbol2 + constant" is a relocatable expression when
  540.  * both symbols are defined in some section.  For this type of relocation
  541.  * both relocations entries are scattered relocation entries.  The value of
  542.  * symbol1 is stored in the first relocation entry's r_value field and the
  543.  * value of symbol2 is stored in the pair's r_value field.
  544.  *
  545.  * A special case for a prebound lazy pointer is needed to be able to set the
  546.  * value of the lazy pointer back to its non-prebound state.  This is done
  547.  * using the GENERIC_RELOC_PB_LA_PTR r_type.  This is a scattered relocation
  548.  * entry where the r_value field is the value of the lazy pointer not prebound. */
  549.  
  550. /* My interpretation (A Fog):
  551.    32-bit: Objects are not addressed by their offset into the section but by
  552.    their "absolute" address. This "absolute" address has no reality.
  553.    It is the address that the object would have if the section was placed
  554.    at the address specified in the addr field of the section header.
  555.    Scattered:
  556.    The first record, of type MAC32_RELOC_SECTDIFF or MAC32_RELOC_LOCAL_SECTDIFF
  557.    contains the "absolute" address of a first reference point, let's call it ref1,
  558.    in the r_value field. The second record, of type MAC32_RELOC_PAIR contains the
  559.    "absolute" address of a second reference point, ref2, in the r_value field.
  560.    The inline value is the "absolute" address of the relocation target minus ref2.
  561.    ref1 is often = target, but may be any label preceding the target. The linker
  562.    has to add (ref1 - ref2) in image minus (ref1 - ref2) in object file to the
  563.    inline value. The relocation source (the position of the inline field) is
  564.    given in r_address in the first record, relative the the section.
  565.    Non-scattered, absolute, r_extern = 1:
  566.    r_symbolnum = symbol index (0-based)
  567.    Non-scattered, absolute, r_extern = 0:
  568.    r_symbolnum = section index, inline = absolute address of target?
  569.    Non-scattered, r_pcrel = 1, r_extern = 1:
  570.    r_symbolnum = symbol index (0-based)
  571.    Inline = source absolute address - 4
  572.    Non-scattered, r_pcrel = 1, r_extern = 0:
  573.    r_symbolnum = section index,
  574.    inline = absolute address of target - absolute address of source - 4
  575. */
  576.  
  577. #define MAC32_RELOC_VANILLA        0   // A generic relocation entry for both addresses contained in data
  578.                                        // and addresses contained in CPU instructions.
  579. #define MAC32_RELOC_PAIR           1   // The second relocation entry of a pair. Only follows a GENERIC_RELOC_SECTDIFF
  580. #define MAC32_RELOC_SECTDIFF       2   // A relocation entry for an item that contains the difference of
  581.                                        // two section addresses. This is generally used for position-independent code generation.
  582. #define MAC32_RELOC_PB_LA_PTR      3   // —Arelocation entry for a prebound lazy pointer. This is always
  583.                                        // a scattered relocation entry. The r_value field contains the non-prebound value of the lazy pointer.
  584. #define MAC32_RELOC_LOCAL_SECTDIFF 4   // SECTDIFF—Similar to GENERIC_RELOC_SECTDIFF except that this entry refers specifically to the address in this item.
  585.                                        // If the address is that of a globally visible coalesced symbol, this relocation entry does not change if the symbol is overridden.
  586.                                        // This is used to associate stack unwinding information with the object code this relocation entry describes.
  587.  
  588. // 64-bit relocation types:
  589. // Scattered relocations are not used in 64-bit Mach-O.
  590. // reloc.h says that references to local symbols are made by the nearest
  591. // preceding public symbol + displacement, but my experiments show that
  592. // local symbol records are used, which of course is easier.
  593. // r_extern = 1 is used even for non-external symbols!
  594. // The target address is not stored inline. The -4 offset for self-relative
  595. // addresses is implicit, unlike in 32-bit Mach-O. If the difference
  596. // between source address and instruction pointer is e.g. -5, then the
  597. // -4 is implicit, and the -1 is explicit!
  598.  
  599. #define MAC64_RELOC_UNSIGNED       0   // absolute address, 32 or 64 bits
  600. #define MAC64_RELOC_SIGNED         1   // signed 32-bit displacement with implicit -4 addend
  601. #define MAC64_RELOC_BRANCH         2   // same, used for CALL and JMP instructions
  602. #define MAC64_RELOC_GOT_LOAD       3   // self-relative load of a GOT entry
  603. #define MAC64_RELOC_GOT            4   // other GOT references
  604. #define MAC64_RELOC_SUBTRACTOR     5   // must be followed by a X86_64_RELOC_UNSIGNED
  605. #define MAC64_RELOC_SIGNED_1       6   // signed 32-bit displacement with implicit -4 addend and explicit -1 addend
  606. #define MAC64_RELOC_SIGNED_2       7   // signed 32-bit displacement with implicit -4 addend and explicit -2 addend
  607. #define MAC64_RELOC_SIGNED_4       8   // signed 32-bit displacement with implicit -4 addend and explicit -4 addend
  608.  
  609.  
  610. // Symbol table entries
  611. /* Format of a symbol table entry of a Mach-O file.  Modified from the BSD
  612.  * format.  The modifications from the original format were changing n_other
  613.  * (an unused field) to n_sect and the addition of the N_SECT type.  These
  614.  * modifications are required to support symbols in an arbitrary number of
  615.  * sections not just the three sections (text, data and bss) in a BSD file. */
  616.  
  617. struct MAC_nlist_32 {
  618.    uint32  n_strx;   // index into the string table
  619.    uint8   n_type;   // type flag, see below
  620.    uint8   n_sect;   // section number or NO_SECT
  621.    int16   n_desc;   // see <mach-o/stab.h>
  622.    uint32  n_value;  // value of this symbol (or stab offset)
  623. };
  624.  
  625. struct MAC_nlist_64 {
  626.    uint32  n_strx;   // index into the string table
  627.    uint8   n_type;   // type flag, see below
  628.    uint8   n_sect;   // section number or NO_SECT
  629.    int16   n_desc;   // see <mach-o/stab.h>
  630.    uint64  n_value;  // value of this symbol (or stab offset)
  631. };
  632.  
  633. /* Symbols with a index into the string table of zero are
  634.  * defined to have a null, "", name.  */
  635.  
  636. /* The n_type field really contains three fields:
  637. *      unsigned char N_STAB:3,
  638. *                    N_PEXT:1,
  639. *                    N_TYPE:3,
  640. *                    N_EXT:1;
  641. * which are used via the following masks. */
  642.  
  643. #define MAC_N_STAB  0xe0  /* if any of these bits set, a symbolic debugging entry */
  644. #define MAC_N_PEXT  0x10  /* private external symbol bit */
  645. #define MAC_N_TYPE  0x0e  /* mask for the type bits */
  646. #define MAC_N_EXT   0x01  /* external symbol bit, set for external symbols */
  647.  
  648. /* Only symbolic debugging entries have some of the N_STAB bits set and if any
  649.  * of these bits are set then it is a symbolic debugging entry (a stab).  In
  650.  * which case then the values of the n_type field (the entire field) are given
  651.  * in <mach-o/stab.h> */
  652.  
  653. // Values for N_TYPE bits of the n_type field.
  654. #define MAC_N_UNDF  0x0   // undefined, n_sect == NO_SECT
  655. #define MAC_N_ABS   0x2   // absolute, n_sect == NO_SECT
  656. #define MAC_N_SECT  0xe   // defined in section number n_sect
  657. #define MAC_N_PBUD  0xc   // prebound undefined (defined in a dylib)
  658. #define MAC_N_INDR  0xa   // indirect
  659.  
  660. /* If the type is MAC_N_INDR then the symbol is defined to be the same as another
  661.  * symbol.  In this case the n_value field is an index into the string table
  662.  * of the other symbol's name.  When the other symbol is defined then they both
  663.  * take on the defined type and value. */
  664.  
  665. /* If the type is MAC_N_SECT then the n_sect field contains an ordinal of the
  666.  * section the symbol is defined in.  The sections are numbered from 1 and
  667.  * refer to sections in order they appear in the load commands for the file
  668.  * they are in.  This means the same ordinal may very well refer to different
  669.  * sections in different files.
  670.  *
  671.  * The n_value field for all symbol table entries (including N_STAB's) gets
  672.  * updated by the link editor based on the value of it's n_sect field and where
  673.  * the section n_sect references gets relocated.  If the value of the n_sect
  674.  * field is NO_SECT then it's n_value field is not changed by the link editor. */
  675.  
  676. #define MAC_NO_SECT         0       // symbol is not in any section
  677. #define MAC_MAX_SECT        255     // 1 thru 255 inclusive
  678.  
  679. /* Common symbols are represented by undefined (N_UNDF) external (N_EXT) types
  680.  * who's values (n_value) are non-zero.  In which case the value of the n_value
  681.  * field is the size (in bytes) of the common symbol.  The n_sect field is set
  682.  * to NO_SECT. */
  683.  
  684. /* To support the lazy binding of undefined symbols in the dynamic link-editor,
  685.  * the undefined symbols in the symbol table (the nlist structures) are marked
  686.  * with the indication if the undefined reference is a lazy reference or
  687.  * non-lazy reference.  If both a non-lazy reference and a lazy reference is
  688.  * made to the same symbol the non-lazy reference takes precedence.  A reference
  689.  * is lazy only when all references to that symbol are made through a symbol
  690.  * pointer in a lazy symbol pointer section.
  691.  *
  692.  * The implementation of marking nlist structures in the symbol table for
  693.  * undefined symbols will be to use some of the bits of the n_desc field as a
  694.  * reference type.  The mask REFERENCE_TYPE will be applied to the n_desc field
  695.  * of an nlist structure for an undefined symbol to determine the type of
  696.  * undefined reference (lazy or non-lazy).
  697.  *
  698.  * The constants for the REFERENCE FLAGS are propagated to the reference table
  699.  * in a shared library file.  In that case the constant for a defined symbol,
  700.  * REFERENCE_FLAG_DEFINED, is also used. */
  701.  
  702. /* Reference type bits of the n_desc field of undefined symbols */
  703. #define MAC_REF_TYPE                                  0xf
  704. /* types of references */
  705. #define MAC_REF_FLAG_UNDEFINED_NON_LAZY               0
  706. #define MAC_REF_FLAG_UNDEFINED_LAZY                   1
  707. #define MAC_REF_FLAG_DEFINED                          2
  708. #define MAC_REF_FLAG_PRIVATE_DEFINED                  3
  709. #define MAC_REF_FLAG_PRIVATE_UNDEFINED_NON_LAZY       4
  710. #define MAC_REF_FLAG_PRIVATE_UNDEFINED_LAZY           5
  711.  
  712. /* To simplify stripping of objects that use are used with the dynamic link
  713.  * editor, the static link editor marks the symbols defined an object that are
  714.  * referenced by a dynamicly bound object (dynamic shared libraries, bundles).
  715.  * With this marking strip knows not to strip these symbols. */
  716.  
  717. /* The non-reference type bits of the n_desc field for global symbols are
  718.  * reserved for the dynamic link editor.  All of these bits must start out
  719.  * zero in the object file. */
  720.  
  721.  
  722. // Additional n_desc flags
  723. #define MAC_REFERENCED_DYNAMICALLY 0x10  // Must be set for any defined symbol that is referenced by dynamic-loader APIs (such as dlsym and NSLookupSymbolInImage) and not ordinary
  724.                                          // undefined symbol references. The strip tool uses this bit to avoid removing symbols that must exist: If the symbol has this bit set, strip does not strip it.
  725.  
  726. #define MAC_N_DESC_DISCARDED       0x20  // Sometimes used by the dynamic linker at runtime in a fully linked image. Do not set this bit in a fully linked image.
  727. //#define MAC_N_DESC_DISCARDED 0x8000
  728.  
  729. #define MAC_N_NO_DEAD_STRIP        0x20  // When set in a relocatable object file (file type MH_OBJECT) on a defined symbol,
  730.                                          // indicates to the static linker to never dead-strip the symbol. (Note that the same bit (0x20) is used for two nonoverlapping purposes.)
  731.  
  732. #define MAC_N_WEAK_REF             0x40  // Indicates that this undefined symbol is aweak reference. If the dynamic linker cannot find a definition
  733.                                          // for this symbol, it sets the address of this symbol to 0. The static linker sets this symbol given the appropriate weak-linking flags.
  734.  
  735. #define MAC_N_WEAK_DEF             0x80  // Indicates that this symbol is a weak definition. If the static linker or the dynamic linker finds another
  736.                                          // (non-weak) definition for this symbol, theweak definition is ignored. Only symbols in a coalesced section (page 21) can be marked as a weak definition.
  737.  
  738. // Data structure used when sorting symbol table for Mach-O file in MacSymbolTableBuilder
  739. template <class TMAC_nlist>
  740. struct MacSymbolRecord : public TMAC_nlist {
  741.    uint32 Name;                        // Index into MacSymbolTableBuilder::StringBuffer
  742.    int OldIndex;                       // Old symbol index
  743. };
  744.  
  745. // Class for building and storing symbol table, sorted or unsorted
  746. template <class TMAC_nlist, class MInt>
  747. class MacSymbolTableBuilder : public CMemoryBuffer {
  748.    int sorted;                                   // Remember if list is sorted
  749.    CMemoryBuffer StringBuffer;                   // Temporary storage of symbol names
  750. public:
  751.    MacSymbolTableBuilder();                      // Constructor
  752.    void AddSymbol(int OldIndex, const char * name, int type, int Desc, int section, MInt value); // Add symbol to list
  753.    void SortList();                              // Sort the list
  754.    int TranslateIndex(int OldIndex);             // Translate old index to new index, after sorting
  755.    void StoreList(CMemoryBuffer * SymbolTable, CMemoryBuffer * StringTable); // Store sorted list in buffers
  756.    int Search(const char * name);                // Search for name. -1 if not found
  757.    MacSymbolRecord<TMAC_nlist> & operator[] (uint32 i);      // Access member
  758. };
  759.  
  760. // structures for MacIntosh universal binaries
  761. struct MAC_UNIV_FAT_HEADER {           // File header for universal binary
  762.    uint32 magic;                       // Magic number 0xCAFEBABE, big endian
  763.    uint32 num_arch;                    // Number of members, big endian
  764. };
  765.  
  766. struct MAC_UNIV_FAT_ARCH {             // Member pointer
  767.    uint32 cputype;                     // cpu type
  768.    uint32 cpusubtype;                  // cpu subtype
  769.    uint32 offset;                      // file offset of member
  770.    uint32 size;                        // size of member
  771.    uint32 align;                       // alignment in file = 2^align
  772. };
  773.  
  774. // Structure used for list of sections that have relocations during disassembly
  775. struct MAC_SECT_WITH_RELOC {
  776.    int32  Section;                     // Section index
  777.    uint32 SectOffset;                  // File offset of section binary data
  778.    uint32 NumReloc;                    // Number of relocations records for this section
  779.    uint32 ReltabOffset;                // File offset of relocation table for this section
  780. };
  781.  
  782. /********************** Strings **********************/
  783. #define MAC_CONSTRUCTOR_NAME    "__mod_init_func"  // Name of constructors section
  784.  
  785.  
  786. // Macros listing all word-size dependent structures, used as template parameter list
  787. #define MACSTRUCTURES    TMAC_header,   TMAC_segment_command,   TMAC_section,   TMAC_nlist, MInt
  788. #define MAC32STRUCTURES  MAC_header_32, MAC_segment_command_32, MAC_section_32, MAC_nlist_32, int32
  789. #define MAC64STRUCTURES  MAC_header_64, MAC_segment_command_64, MAC_section_64, MAC_nlist_64, int64
  790.  
  791. #endif // #ifndef MACHO_H
  792.