root/trunk/src/libmach.c

Revision 543, 20.6 kB (checked in by walter, 2 years ago)

sometimes user_id is more than 6 digits

  • Property svn:eol-style set to native
Line 
1 // Compiler implementation of the D programming language
2 // Copyright (c) 1999-2009 by Digital Mars
3 // All Rights Reserved
4 // written by Walter Bright
5 // http://www.digitalmars.com
6 // License for redistribution is by either the Artistic License
7 // in artistic.txt, or the GNU General Public License in gnu.txt.
8 // See the included readme.txt for details.
9
10 /* Implements object library reading and writing in the Mach-O object
11  * module format. While the format is
12  * equivalent to the Linux arch format, it differs in many details.
13  * This format is described in the Apple document
14  * "Mac OS X ABI Mach-O File Format Reference" dated 2007-04-26
15  * in the section "Static Archive Libraries".
16  * That specification is only about half complete and has numerous
17  * errors, so use the source code here as a better guide.
18  */
19
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <assert.h>
23 #include <time.h>
24 #include <unistd.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27
28 #include "mach.h"
29
30 #include "rmem.h"
31 #include "root.h"
32 #include "stringtable.h"
33
34 #include "mars.h"
35 #include "lib.h"
36
37 #define LOG 0
38
39 Library::Library()
40 {
41     libfile = NULL;
42 }
43
44 /***********************************
45  * Set the library file name based on the output directory
46  * and the filename.
47  * Add default library file name extension.
48  */
49
50 void Library::setFilename(char *dir, char *filename)
51 {
52 #if LOG
53     printf("Library::setFilename(dir = '%s', filename = '%s')\n",
54         dir ? dir : "", filename ? filename : "");
55 #endif
56     char *arg = filename;
57     if (!arg || !*arg)
58     {   // Generate lib file name from first obj name
59         char *n = (char *)global.params.objfiles->data[0];
60
61         n = FileName::name(n);
62         FileName *fn = FileName::forceExt(n, global.lib_ext);
63         arg = fn->toChars();
64     }
65     if (!FileName::absolute(arg))
66         arg = FileName::combine(dir, arg);
67     FileName *libfilename = FileName::defaultExt(arg, global.lib_ext);
68
69     libfile = new File(libfilename);
70 }
71
72 void Library::write()
73 {
74     if (global.params.verbose)
75         printf("library   %s\n", libfile->name->toChars());
76
77     OutBuffer libbuf;
78     WriteLibToBuffer(&libbuf);
79
80     // Transfer image to file
81     libfile->setbuffer(libbuf.data, libbuf.offset);
82     libbuf.extractData();
83
84
85     char *p = FileName::path(libfile->name->toChars());
86     FileName::ensurePathExists(p);
87     //mem.free(p);
88
89     libfile->writev();
90 }
91
92 /*****************************************************************************/
93
94 void Library::addLibrary(void *buf, size_t buflen)
95 {
96     addObject(NULL, buf, buflen);
97 }
98
99
100 /*****************************************************************************/
101 /*****************************************************************************/
102
103 static uint32_t mach_signature = MH_MAGIC;      // Mach-O file signature
104
105 void sputl(int value, void* buffer)
106 {
107     unsigned char *p = (unsigned char*)buffer;
108     p[3] = (unsigned char)(value >> 24);
109     p[2] = (unsigned char)(value >> 16);
110     p[1] = (unsigned char)(value >> 8);
111     p[0] = (unsigned char)(value);
112 }
113
114 int sgetl(void* buffer)
115 {
116     unsigned char *p = (unsigned char*)buffer;
117     return (((((p[3] << 8) | p[2]) << 8) | p[1]) << 8) | p[0];
118 }
119
120
121 struct ObjModule
122 {
123     unsigned char *base;        // where are we holding it in memory
124     unsigned length;            // in bytes
125     unsigned offset;            // offset from start of library
126     char *name;                 // module name (file name)
127     long file_time;             // file time
128     unsigned user_id;
129     unsigned group_id;
130     unsigned file_mode;
131     int scan;                   // 1 means scan for symbols
132 };
133
134 struct Header
135 {
136     #define OBJECT_NAME_SIZE 16
137     char object_name[OBJECT_NAME_SIZE];
138     char file_time[12];
139     char user_id[6];
140     char group_id[6];
141     char file_mode[8];          // in octal
142     char file_size[10];
143     char trailer[2];
144 };
145
146 void OmToHeader(Header *h, ObjModule *om)
147 {
148     size_t slen = strlen(om->name);
149     int nzeros = 8 - ((slen + 4) & 7);
150     if (nzeros < 4)
151         nzeros += 8;            // emulate mysterious behavior of ar
152
153     size_t len = sprintf(h->object_name, "#1/%ld", slen + nzeros);
154     memset(h->object_name + len, ' ', OBJECT_NAME_SIZE - len);
155
156     /* In the following sprintf's, don't worry if the trailing 0
157      * that sprintf writes goes off the end of the field. It will
158      * write into the next field, which we will promptly overwrite
159      * anyway. (So make sure to write the fields in ascending order.)
160      */
161
162     len = sprintf(h->file_time, "%lu", om->file_time);
163     assert(len <= 12);
164     memset(h->file_time + len, ' ', 12 - len);
165
166     if (om->user_id > 999999)           // yes, it happens
167         om->user_id = 0;                // don't really know what to do here
168     len = sprintf(h->user_id, "%u", om->user_id);
169     assert(len <= 6);
170     memset(h->user_id + len, ' ', 6 - len);
171
172     if (om->group_id > 999999)          // yes, it happens
173         om->group_id = 0;               // don't really know what to do here
174     len = sprintf(h->group_id, "%u", om->group_id);
175     assert(len <= 6);
176     memset(h->group_id + len, ' ', 6 - len);
177
178     len = sprintf(h->file_mode, "%o", om->file_mode);
179     assert(len <= 8);
180     memset(h->file_mode + len, ' ', 8 - len);
181
182     int filesize = om->length;
183     filesize = (filesize + 7) & ~7;
184     len = sprintf(h->file_size, "%lu", slen + nzeros + filesize);
185     assert(len <= 10);
186     memset(h->file_size + len, ' ', 10 - len);
187
188     h->trailer[0] = '`';
189     h->trailer[1] = '\n';
190 }
191
192 void Library::addSymbol(ObjModule *om, char *name, int pickAny)
193 {
194 #if LOG
195     printf("Library::addSymbol(%s, %s, %d)\n", om->name, name, pickAny);
196 #endif
197 #if 0 // let linker sort out duplicates
198     StringValue *s = tab.insert(name, strlen(name));
199     if (!s)
200     {   // already in table
201         if (!pickAny)
202         {   s = tab.lookup(name, strlen(name));
203             assert(s);
204             ObjSymbol *os = (ObjSymbol *)s->ptrvalue;
205             error("multiple definition of %s: %s and %s: %s",
206                 om->name, name, os->om->name, os->name);
207         }
208     }
209     else
210     {
211         ObjSymbol *os = new ObjSymbol();
212         os->name = strdup(name);
213         os->om = om;
214         s->ptrvalue = (void *)os;
215
216         objsymbols.push(os);
217     }
218 #else
219     ObjSymbol *os = new ObjSymbol();
220     os->name = strdup(name);
221     os->om = om;
222     objsymbols.push(os);
223 #endif
224 }
225
226 /************************************
227  * Scan single object module for dictionary symbols.
228  * Send those symbols to Library::addSymbol().
229  */
230
231 void Library::scanObjModule(ObjModule *om)
232 {
233 #if LOG
234     printf("Library::scanObjModule(%s)\n", om->name);
235 #endif
236     unsigned char *buf = (unsigned char *)om->base;
237     size_t buflen = om->length;
238     int reason = 0;
239
240     struct mach_header *header = (struct mach_header *)buf;
241
242     /* First do sanity checks on object file
243      */
244     if (buflen < sizeof(struct mach_header))
245     {
246       Lcorrupt:
247         error("Mach-O object module %s corrupt, %d", om->name, reason);
248         return;
249     }
250     if (header->magic != MH_MAGIC)
251     {   reason = 1;
252         goto Lcorrupt;
253     }
254     if (header->cputype != CPU_TYPE_I386)
255     {
256         error("Mach-O object module %s has cputype = %d, should be %d",
257                 om->name, header->cputype, CPU_TYPE_I386);
258         return;
259     }
260     if (header->filetype != MH_OBJECT)
261     {
262         error("Mach-O object module %s has file type = %d, should be %d",
263                 om->name, header->filetype, MH_OBJECT);
264         return;
265     }
266     if (buflen < sizeof(struct mach_header) + header->sizeofcmds)
267     {   reason = 2;
268         goto Lcorrupt;
269     }
270
271     struct segment_command *segment_commands = NULL;
272     struct symtab_command *symtab_commands = NULL;
273     struct dysymtab_command *dysymtab_commands = NULL;
274
275     // Commands immediately follow mach_header
276     char *commands = (char *)buf + sizeof(struct mach_header);
277     for (int i = 0; i < header->ncmds; i++)
278     {   struct load_command *command = (struct load_command *)commands;
279         //printf("cmd = 0x%02x, cmdsize = %u\n", command->cmd, command->cmdsize);
280         switch (command->cmd)
281         {
282             case LC_SEGMENT:
283                 segment_commands = (struct segment_command *)command;
284                 break;
285             case LC_SYMTAB:
286                 symtab_commands = (struct symtab_command *)command;
287                 break;
288             case LC_DYSYMTAB:
289                 dysymtab_commands = (struct dysymtab_command *)command;
290                 break;
291         }
292         commands += command->cmdsize;
293     }
294
295     if (symtab_commands)
296     {
297         // Get pointer to string table
298         char *strtab = (char *)buf + symtab_commands->stroff;
299         if (buflen < symtab_commands->stroff + symtab_commands->strsize)
300         {   reason = 3;
301             goto Lcorrupt;
302         }
303
304         // Get pointer to symbol table
305         struct nlist *symtab = (struct nlist *)((char *)buf + symtab_commands->symoff);
306         if (buflen < symtab_commands->symoff + symtab_commands->nsyms * sizeof(struct nlist))
307         {   reason = 4;
308             goto Lcorrupt;
309         }
310
311         // For each symbol
312         for (int i = 0; i < symtab_commands->nsyms; i++)
313         {   struct nlist *s = symtab + i;
314             char *name = strtab + s->n_un.n_strx;
315
316             if (s->n_type & N_STAB)
317                 // values in /usr/include/mach-o/stab.h
318                 ; //printf(" N_STAB");
319             else
320             {
321                 if (s->n_type & N_PEXT)
322                     ;
323                 if (s->n_type & N_EXT)
324                     ;
325                 switch (s->n_type & N_TYPE)
326                 {
327                     case N_UNDF:
328                         break;
329                     case N_ABS:
330                         break;
331                     case N_SECT:
332                         if (s->n_type & N_EXT /*&& !(s->n_desc & N_REF_TO_WEAK)*/)
333                             addSymbol(om, name, 1);
334                         break;
335                     case N_PBUD:
336                         break;
337                     case N_INDR:
338                         break;
339                 }
340             }
341         }
342     }
343 }
344
345 /***************************************
346  * Add object module or library to the library.
347  * Examine the buffer to see which it is.
348  * If the buffer is NULL, use module_name as the file name
349  * and load the file.
350  */
351
352 void Library::addObject(const char *module_name, void *buf, size_t buflen)
353 {
354     if (!module_name)
355         module_name = "";
356 #if LOG
357     printf("Library::addObject(%s)\n", module_name);
358 #endif
359     int fromfile = 0;
360     if (!buf)
361     {   assert(module_name[0]);
362         FileName f((char *)module_name, 0);
363         File file(&f);
364         file.readv();
365         buf = file.buffer;
366         buflen = file.len;
367         file.ref = 1;
368         fromfile = 1;
369     }
370     int reason = 0;
371
372     if (buflen < 16)
373     {
374 #if LOG
375         printf("buf = %p, buflen = %d\n", buf, buflen);
376 #endif
377       Lcorrupt:
378         error("corrupt object module %s %d", module_name, reason);
379         return;
380     }
381
382     if (memcmp(buf, "!<arch>\n", 8) == 0)
383     {   /* Library file.
384          * Pull each object module out of the library and add it
385          * to the object module array.
386          */
387 #if LOG
388         printf("archive, buf = %p, buflen = %d\n", buf, buflen);
389 #endif
390         unsigned offset = 8;
391         char *symtab = NULL;
392         unsigned symtab_size = 0;
393         char *filenametab = NULL;
394         unsigned filenametab_size = 0;
395         unsigned mstart = objmodules.dim;
396         while (offset < buflen)
397         {
398             if (offset + sizeof(Header) >= buflen)
399             {   reason = 1;
400                 goto Lcorrupt;
401             }
402             Header *header = (Header *)((unsigned char *)buf + offset);
403             offset += sizeof(Header);
404             char *endptr = NULL;
405             unsigned long size = strtoul(header->file_size, &endptr, 10);
406             if (endptr >= &header->file_size[10] || *endptr != ' ')
407             {   reason = 2;
408                 goto Lcorrupt;
409             }
410             if (offset + size > buflen)
411             {   reason = 3;
412                 goto Lcorrupt;
413             }
414
415             if (memcmp(header->object_name, "__.SYMDEF       ", 16) == 0 ||
416                 memcmp(header->object_name, "__.SYMDEF SORTED", 16) == 0)
417             {
418                 /* Instead of rescanning the object modules we pull from a
419                  * library, just use the already created symbol table.
420                  */
421                 if (symtab)
422                 {   reason = 4;
423                     goto Lcorrupt;
424                 }
425                 symtab = (char *)buf + offset;
426                 symtab_size = size;
427                 if (size < 4)
428                 {   reason = 5;
429                     goto Lcorrupt;
430                 }
431             }
432             else
433             {
434                 ObjModule *om = new ObjModule();
435                 om->base = (unsigned char *)buf + offset - sizeof(Header);
436                 om->length = size + sizeof(Header);
437                 om->offset = 0;
438                 om->name = (char *)(om->base + sizeof(Header));
439                 om->file_time = strtoul(header->file_time, &endptr, 10);
440                 om->user_id   = strtoul(header->user_id, &endptr, 10);
441                 om->group_id  = strtoul(header->group_id, &endptr, 10);
442                 om->file_mode = strtoul(header->file_mode, &endptr, 8);
443                 om->scan = 0;
444                 objmodules.push(om);
445             }
446             offset += (size + 1) & ~1;
447         }
448         if (offset != buflen)
449         {   reason = 9;
450             goto Lcorrupt;
451         }
452
453         /* Scan the library's symbol table, and insert it into our own.
454          * We use this instead of rescanning the object module, because
455          * the library's creator may have a different idea of what symbols
456          * go into the symbol table than we do.
457          * This is also probably faster.
458          */
459         unsigned nsymbols = sgetl(symtab) / 8;
460         char *s = symtab + 4 + nsymbols * 8 + 4;
461         if (4 + nsymbols * 8 + 4 > symtab_size)
462         {   reason = 10;
463             goto Lcorrupt;
464         }
465         for (unsigned i = 0; i < nsymbols; i++)
466         {
467             unsigned soff = sgetl(symtab + 4 + i * 8);
468             char *name = s + soff;
469             //printf("soff = x%x name = %s\n", soff, name);
470             if (s + strlen(name) + 1 - symtab > symtab_size)
471             {   reason = 11;
472                 goto Lcorrupt;
473             }
474             unsigned moff = sgetl(symtab + 4 + i * 8 + 4);
475             //printf("symtab[%d] moff = x%x  x%x, name = %s\n", i, moff, moff + sizeof(Header), name);
476             for (unsigned m = mstart; 1; m++)
477             {   if (m == objmodules.dim)
478                 {   reason = 12;
479                     goto Lcorrupt;              // didn't find it
480                 }
481                 ObjModule *om = (ObjModule *)objmodules.data[m];
482                 //printf("\tom offset = x%x\n", (char *)om->base - (char *)buf);
483                 if (moff == (char *)om->base - (char *)buf)
484                 {
485                     addSymbol(om, name, 1);
486 //                  if (mstart == m)
487 //                      mstart++;
488                     break;
489                 }
490             }
491         }
492
493         return;
494     }
495
496     if (memcmp(buf, &mach_signature, sizeof(mach_signature)) != 0)
497     {   reason = 13;
498         goto Lcorrupt;
499     }
500
501     /* It's an Mach-O object module
502      */
503     ObjModule *om = new ObjModule();
504     om->base = (unsigned char *)buf;
505     om->length = buflen;
506     om->offset = 0;
507     om->name = FileName::name(module_name);     // remove path, but not extension
508     om->scan = 1;
509     if (fromfile)
510     {   struct stat statbuf;
511         int i = stat(module_name, &statbuf);
512         if (i == -1)            // error, errno is set
513         {   reason = 14;
514             goto Lcorrupt;
515         }
516         om->file_time = statbuf.st_ctime;
517         om->user_id   = statbuf.st_uid;
518         om->group_id  = statbuf.st_gid;
519         om->file_mode = statbuf.st_mode;
520     }
521     else
522     {   /* Mock things up for the object module file that never was
523          * actually written out.
524          */
525         static uid_t uid;
526         static gid_t gid;
527         static int init;
528         if (!init)
529         {   init = 1;
530             uid = getuid();
531             gid = getgid();
532         }
533         time(&om->file_time);
534         om->user_id = uid;
535         om->group_id = gid;
536         om->file_mode = 0100644;
537     }
538     objmodules.push(om);
539 }
540
541
542 /*****************************************************************************/
543 /*****************************************************************************/
544
545 /**********************************************
546  * Create and write library to libbuf.
547  * The library consists of:
548  *      !<arch>\n
549  *      header
550  *      dictionary
551  *      object modules...
552  */
553
554 void Library::WriteLibToBuffer(OutBuffer *libbuf)
555 {
556 #if LOG
557     printf("Library::WriteLibToBuffer()\n");
558 #endif
559     static char pad[7] = { 0x0A,0x0A,0x0A,0x0A,0x0A,0x0A,0x0A, };
560
561     /************* Scan Object Modules for Symbols ******************/
562
563     for (int i = 0; i < objmodules.dim; i++)
564     {   ObjModule *om = (ObjModule *)objmodules.data[i];
565         if (om->scan)
566         {
567             scanObjModule(om);
568         }
569     }
570
571     /************* Determine module offsets ******************/
572
573     unsigned moffset = 8 + sizeof(Header) + 4 + 4;
574
575     for (int i = 0; i < objsymbols.dim; i++)
576     {   ObjSymbol *os = (ObjSymbol *)objsymbols.data[i];
577
578         moffset += 8 + strlen(os->name) + 1;
579     }
580     moffset = (moffset + 3) & ~3;
581 //    if (moffset & 4)
582 //      moffset += 4;
583     unsigned hoffset = moffset;
584
585 #if LOG
586     printf("\tmoffset = x%x\n", moffset);
587 #endif
588
589     for (int i = 0; i < objmodules.dim; i++)
590     {   ObjModule *om = (ObjModule *)objmodules.data[i];
591
592         moffset += moffset & 1;
593         om->offset = moffset;
594         if (om->scan)
595         {
596             size_t slen = strlen(om->name);
597             int nzeros = 8 - ((slen + 4) & 7);
598             if (nzeros < 4)
599                 nzeros += 8;            // emulate mysterious behavior of ar
600             int filesize = om->length;
601             filesize = (filesize + 7) & ~7;
602             moffset += sizeof(Header) + slen + nzeros + filesize;
603         }
604         else
605         {
606             moffset += om->length;
607         }
608     }
609
610     libbuf->reserve(moffset);
611
612     /************* Write the library ******************/
613     libbuf->write("!<arch>\n", 8);
614
615     ObjModule om;
616     om.base = NULL;
617     om.length = hoffset - (8 + sizeof(Header));
618     om.offset = 8;
619     om.name = (char*)"";
620     ::time(&om.file_time);
621     om.user_id = getuid();
622     om.group_id = getgid();
623     om.file_mode = 0100644;
624
625     Header h;
626     OmToHeader(&h, &om);
627     memcpy(h.object_name, "__.SYMDEF", 9);
628     int len = sprintf(h.file_size, "%u", om.length);
629     assert(len <= 10);
630     memset(h.file_size + len, ' ', 10 - len);
631
632     libbuf->write(&h, sizeof(h));
633
634     char buf[4];
635
636     sputl(objsymbols.dim * 8, buf);
637     libbuf->write(buf, 4);
638
639     int stringoff = 0;
640     for (int i = 0; i < objsymbols.dim; i++)
641     {   ObjSymbol *os = (ObjSymbol *)objsymbols.data[i];
642
643         sputl(stringoff, buf);
644         libbuf->write(buf, 4);
645
646         sputl(os->om->offset, buf);
647         libbuf->write(buf, 4);
648
649         stringoff += strlen(os->name) + 1;
650     }
651
652     sputl(stringoff, buf);
653     libbuf->write(buf, 4);
654
655     for (int i = 0; i < objsymbols.dim; i++)
656     {   ObjSymbol *os = (ObjSymbol *)objsymbols.data[i];
657
658         libbuf->writestring(os->name);
659         libbuf->writeByte(0);
660     }
661     while (libbuf->offset & 3)
662         libbuf->writeByte(0);
663
664 //    if (libbuf->offset & 4)
665 //      libbuf->write(pad, 4);
666
667 #if LOG
668     printf("\tlibbuf->moffset = x%x\n", libbuf->offset);
669 #endif
670     assert(libbuf->offset == hoffset);
671
672     /* Write out each of the object modules
673      */
674     for (int i = 0; i < objmodules.dim; i++)
675     {   ObjModule *om = (ObjModule *)objmodules.data[i];
676
677         if (libbuf->offset & 1)
678             libbuf->writeByte('\n');    // module alignment
679
680         assert(libbuf->offset == om->offset);
681
682         if (om->scan)
683         {
684             OmToHeader(&h, om);
685             libbuf->write(&h, sizeof(h));       // module header
686
687             size_t len = strlen(om->name);
688             libbuf->write(om->name, len);
689
690             int nzeros = 8 - ((len + 4) & 7);
691             if (nzeros < 4)
692                 nzeros += 8;            // emulate mysterious behavior of ar
693             libbuf->fill0(nzeros);
694
695             libbuf->write(om->base, om->length);        // module contents
696
697             // obj modules are padded out to 8 bytes in length with 0x0A
698             int filealign = om->length & 7;
699             if (filealign)
700             {
701                 libbuf->write(pad, 8 - filealign);
702             }
703         }
704         else
705         {
706             libbuf->write(om->base, om->length);        // module contents
707         }
708     }
709
710 #if LOG
711     printf("moffset = x%x, libbuf->offset = x%x\n", moffset, libbuf->offset);
712 #endif
713     assert(libbuf->offset == moffset);
714 }
Note: See TracBrowser for help on using the browser.