当前位置: 首页 > news >正文

gcc 源码阅读--C语言预处理

在c/c++语言中,编译器第一项处理的就是做预处理,

比如#include ,#ifdef #endif  #if 等等

GCC实现这部分代码放在一个单独的目录中,libcpp

这里面有几个重要结构需要了解:

struct GTY(()) cpp_token {

  /* Location of first char of token, together with range of full token.  */
location_t src_loc;
// 记录词法元素中第一个字符的源码位置 


ENUM_BITFIELD(cpp_ttype) type : CHAR_BIT;  /* token type */
//对应的类型,CPP_NUMBER
unsigned short flags;     /* flags - see above */

  union cpp_token_u
{
// 代表的是为各个词法元素最终建立的值节点,不同的词法元素使用不同的结构体。
/* An identifier.  */
struct cpp_identifier GTY ((tag ("CPP_TOKEN_FLD_NODE"))) node;

    /* Inherit padding from this token.  */
cpp_token * GTY ((tag ("CPP_TOKEN_FLD_SOURCE"))) source;

    /* A string, or number.  */
struct cpp_string GTY ((tag ("CPP_TOKEN_FLD_STR"))) str;

    /* Argument no. (and original spelling) for a CPP_MACRO_ARG.  */
struct cpp_macro_arg GTY ((tag ("CPP_TOKEN_FLD_ARG_NO"))) macro_arg;

    /* Original token no. for a CPP_PASTE (from a sequence of
consecutive paste tokens in a macro expansion).  */
unsigned int GTY ((tag ("CPP_TOKEN_FLD_TOKEN_NO"))) token_no;

    /* Caller-supplied identifier for a CPP_PRAGMA.  */
unsigned int GTY ((tag ("CPP_TOKEN_FLD_PRAGMA"))) pragma;
} GTY ((desc ("cpp_token_val_index (&%1)"))) val;
};
struct GTY (()) c_token {
/* The kind of token.  */
ENUM_BITFIELD (cpp_ttype) type : 8;
/* If this token is a CPP_NAME, this value indicates whether also
declared as some kind of type.  Otherwise, it is C_ID_NONE.  */
ENUM_BITFIELD (c_id_kind) id_kind : 8;//关键字标识符
/* If this token is a keyword, this value indicates which keyword.
Otherwise, this value is RID_MAX.  */
ENUM_BITFIELD (rid) keyword : 8;
/* If this token is a CPP_PRAGMA, this indicates the pragma that
was seen.  Otherwise it is PRAGMA_NONE.  */
ENUM_BITFIELD (pragma_kind) pragma_kind : 8;
/* The location at which this token was found.  */
location_t location;
/* The value associated with this token, if any.  */
tree value;
/* Token flags.  */
unsigned char flags;

  source_range get_range () const
{
return get_range_from_loc (line_table, location);
}

  location_t get_finish () const
{
return get_range ().m_finish;
}
};

struct c_parser {

  c_token * tokens;    /* 当前正在处理的语法符号c_token的地址,这里除了初始化时,应该指向 tokens_buf[0] */

  c_token tokens_buf[4];  /* c_token预读缓存,按照gcc的语法分析原理,预读不会超过4个语法符号 */

  unsigned int tokens_avail;    /* tokens_buf中可用的预读词法符号的数目 */
。。。。。。
}

struct cpp_reader
{
/* Top of buffer stack.  */
cpp_buffer *buffer;

  /* Overlaid buffer (can be different after processing #include).  */
cpp_buffer *overlaid_buffer;

  /* Lexer state.  */
struct lexer_state state;

  /* Source line tracking.  */
class line_maps *line_table;

  /* The line of the '#' of the current directive.  */
location_t directive_line;

  /* Memory buffers.  */
_cpp_buff *a_buff;        /* Aligned permanent storage.  */
_cpp_buff *u_buff;        /* Unaligned permanent storage.  */
_cpp_buff *free_buffs;    /* Free buffer chain.  */

  /* Context stack.  */
struct cpp_context base_context;
struct cpp_context *context;

  /* If in_directive, the directive if known.  */
const struct directive *directive;

  /* Token generated while handling a directive, if any. */
cpp_token directive_result;

  /* When expanding a macro at top-level, this is the location of the
macro invocation.  */
location_t invocation_location;

  /* This is the node representing the macro being expanded at
top-level.  The value of this data member is valid iff
cpp_in_macro_expansion_p() returns TRUE.  */
cpp_hashnode *top_most_macro_node;

  /* Nonzero if we are about to expand a macro.  Note that if we are
really expanding a macro, the function macro_of_context returns
the macro being expanded and this flag is set to false.  Client
code should use the function cpp_in_macro_expansion_p to know if we
are either about to expand a macro, or are actually expanding
one.  */
bool about_to_expand_macro_p;

  /* True if the preprocessor should diagnose CPP_DOT or CPP_COLON
tokens as the first ones coming from macro expansion.  */
bool diagnose_dot_colon_from_macro_p;

  /* Search paths for include files.  */
struct cpp_dir *quote_include;    /* "" */
struct cpp_dir *bracket_include;    /* <> */
struct cpp_dir no_search_path;    /* No path.  */
struct cpp_dir *embed_include;    /* #embed <> */

  /* Chain of all hashed _cpp_file instances.  */
struct _cpp_file *all_files;

  struct _cpp_file *main_file;

  /* File and directory hash table.  */
struct htab *file_hash;
struct htab *dir_hash;
struct file_hash_entry_pool *file_hash_entries;

  /* Negative path lookup hash table.  */
struct htab *nonexistent_file_hash;
struct obstack nonexistent_file_ob;

  /* Nonzero means don't look for #include "foo" the source-file
directory.  */
bool quote_ignores_source_dir;

  /* Nonzero if any file has contained #pragma once or #import has
been used.  */
bool seen_once_only;

  /* Multiple include optimization and -Wheader-guard warning.  */
const cpp_hashnode *mi_cmacro;
const cpp_hashnode *mi_ind_cmacro;
const cpp_hashnode *mi_def_cmacro;
location_t mi_loc, mi_def_loc;
bool mi_valid;

  /* Lexing.  */
cpp_token *cur_token;
tokenrun base_run, *cur_run;
unsigned int lookaheads;

  /* Nonzero prevents the lexer from re-using the token runs.  */
unsigned int keep_tokens;

  /* Buffer to hold macro definition string.  */
unsigned char *macro_buffer;
unsigned int macro_buffer_len;

  /* Descriptor for converting from the source character set to the
execution character set.  */
struct cset_converter narrow_cset_desc;

  /* Descriptor for converting from the source character set to the
UTF-8 execution character set.  */
struct cset_converter utf8_cset_desc;

  /* Descriptor for converting from the source character set to the
UTF-16 execution character set.  */
struct cset_converter char16_cset_desc;

  /* Descriptor for converting from the source character set to the
UTF-32 execution character set.  */
struct cset_converter char32_cset_desc;

  /* Descriptor for converting from the source character set to the
wide execution character set.  */
struct cset_converter wide_cset_desc;

  /* Date and time text.  Calculated together if either is requested.  */
const unsigned char *date;
const unsigned char *time;

  /* Time stamp, set idempotently lazily.  */
time_t time_stamp;
int time_stamp_kind; /* Or errno.  */

  /* A token forcing paste avoidance, and one demarking macro arguments.  */
cpp_token avoid_paste;
cpp_token endarg;

  /* Opaque handle to the dependencies of mkdeps.cc.  */
class mkdeps *deps;

  /* Obstack holding all macro hash nodes.  This never shrinks.
See identifiers.cc */
struct obstack hash_ob;

  /* Obstack holding buffer and conditional structures.  This is a
real stack.  See directives.cc.  */
struct obstack buffer_ob;

  /* Pragma table - dynamic, because a library user can add to the
list of recognized pragmas.  */
struct pragma_entry *pragmas;

  /* Call backs to cpplib client.  */
struct cpp_callbacks cb;

  /* Identifier hash table.  */
struct ht *hash_table;

  /* Identifier ancillary data hash table.  */
struct ht *extra_hash_table;

  /* Expression parser stack.  */
struct op *op_stack, *op_limit;

  /* User visible options.  */
struct cpp_options opts;

  /* Special nodes - identifiers with predefined significance to the
preprocessor.  */
struct spec_nodes spec_nodes;

  /* Whether cpplib owns the hashtable.  */
bool our_hashtable, our_extra_hashtable;

  /* Traditional preprocessing output buffer (a logical line).  */
struct
{
unsigned char *base;
unsigned char *limit;
unsigned char *cur;
location_t first_line;
} out;

  /* Used for buffer overlays by traditional.cc.  */
const unsigned char *saved_cur, *saved_rlimit, *saved_line_base;

  /* A saved list of the defined macros, for dependency checking
of precompiled headers.  */
struct cpp_savedstate *savedstate;

  /* Next value of __COUNTER__ macro. */
unsigned int counter;

  /* Table of comments, when state.save_comments is true.  */
cpp_comment_table comments;

  /* List of saved macros by push_macro.  */
struct def_pragma_macro *pushed_macros;

  /* If non-zero, the lexer will use this location for the next token
instead of getting a location from the linemap.  */
location_t forced_token_location;

  /* Location identifying the main source file -- intended to be line
zero of said file.  */
location_t main_loc;

  /* If non-zero, override diagnostic locations (other than DK_NOTE
diagnostics) to this one.  */
location_t diagnostic_override_loc;

  /* Returns true iff we should warn about UTF-8 bidirectional control
characters.  */
bool warn_bidi_p () const
{
return (CPP_OPTION (this, cpp_warn_bidirectional)
& (bidirectional_unpaired|bidirectional_any));
}
}


/* An identifier hash table for cpplib and the front ends.  */
struct ht
{
/* Identifiers are allocated from here.  */
struct obstack stack;  //负责此hash表中的内存分配

   /*指向一个hashnode[nslots]数组的首地址,这个数组就是所谓的hash桶,数组中的每个元素都记录了一个具体元素的指针(所以每个元素叫做一个hashnode)
而hashnode具体的元素则是一个 ht_identifer,其只能代表一个字符串的内容,长度和hash.
此hash桶是自动扩展的,在ht搜索函数ht_lookup_with_hash中,若发现整个hash table超过3/4都满了,就会主动扩展此hash table(重新分配,复制,释放原有的)。*/

  hashnode *entries;
/* Call back, allocate a node.  */

/*
整个gcc源码中有两个alloc_node函数,一个定义在./gcc/stringpool.c中,一个定义在libcpp/identifiers.c中(libcpp这个目录是负责预处理和词法分析的).
* 对于cc1来说,其有自己的alloc_node函数,调用的总是 gcc/stringpools.c:alloc_node
* 而对于使用libcpp.a的其他程序,如果自己没有实现alloc_node函数,那么会默认使用./libcpp/identifier.c:alloc_node函数
alloc_node函数是用来分配节点内存的,分配后hashnode[]数组中的指针,也就指向这个内存中的元素,在ht搜索过程中(ht_lookup_with_hash),若发现需要新
插入一个元素,则就会调用alloc_node来分配内存,最终其返回值会被记录到hashnode[]中。
注: alloc_node可以为节点分配任意类型的结构体,只要最终返回此结构体中的一个ht_identifer结构体即可(./gcc/stringpool.c真正分配的是一个 lang_identifier树节点
*/
hashnode (*alloc_node) (cpp_hash_table *);
/* Call back, allocate something that hangs off a node like a cpp_macro.  
NULL means use the usual allocator.  */
void * (*alloc_subobject) (size_t);

  unsigned int nslots;      // hash中总共能存多少个指针
unsigned int nelements;   /* Number of live elements.  */

  /* Link to reader, if any.  For the benefit of cpplib.  */
struct cpp_reader *pfile;  //指向对应的cpp_reader(即parse_in)

  /* Table usage statistics.  */
unsigned int searches;
unsigned int collisions;

  /* Should 'entries' be freed when it is no longer needed?  */
bool entries_owned;
};

实现代码在:

toplev::main 
=>lang_hooks.init_options = c_common_init_options
=> parse_in = cpp_create_reader()                         //1) 这里主要是对全局变量parse_in的初始化
=> do_compile
=> process_options
=> lang_hooks.post_options = c_common_post_options
=> cpp_read_main_file (parse_in, in_fnames[0])        //2) 这里主要负责打开并读入编译单元文件
=> compile_file();
=> lang_hooks.parse_file = c_common_parse_file()
=> c_parse_file ();  

在这个函数中会初始化模块需要用到的几个参数:

init_stringpool (void)
{
/* Clean up if we're called more than once.
(We can't make this idempotent since identifiers contain state) */
if (ident_hash)
ht_destroy (ident_hash);
if (ident_hash_extra)
ht_destroy (ident_hash_extra);

  /* Create with 16K (2^14) entries.  */
ident_hash = ht_create (14);
ident_hash->alloc_node = alloc_node;
ident_hash->alloc_subobject = stringpool_ggc_alloc;

  /* Create with 64 (2^6) entries.  */
ident_hash_extra = ht_create (6);
ident_hash_extra->alloc_node = [] (cpp_hash_table *)
{
return HT_NODE (ggc_cleared_alloc<cpp_hashnode_extra> ());
};
ident_hash_extra->alloc_subobject = stringpool_ggc_alloc;
}
general_init (const char *argv0, bool init_signals, unique_argv original_argv)

{

.....

line_table = ggc_alloc<line_maps> ();
linemap_init (line_table, BUILTINS_LOCATION);
line_table->m_reallocator = realloc_for_line_map;
line_table->m_round_alloc_size = ggc_round_alloc_size;
line_table->default_range_bits = line_map_suggested_range_bits;

......

}

   

c_common_init_options (unsigned int decoded_options_count,
struct cl_decoded_option *decoded_options)
{
unsigned int i;
struct cpp_callbacks *cb;

  g_string_concat_db
= new (ggc_alloc <string_concat_db> ()) string_concat_db ();

  parse_in = cpp_create_reader (c_dialect_cxx () ? CLK_GNUCXX : CLK_GNUC89,
ident_hash, line_table, ident_hash_extra);

cb = cpp_get_callbacks (parse_in);
cb->diagnostic = c_cpp_diagnostic;

.....

}

/* Post-switch processing.  */
bool
c_common_post_options (const char **pfilename)  

{

...

 *pfilename = this_input_filename
= cpp_read_main_file (parse_in, in_fnames[0],
/* We'll inject preamble pieces if this is
not preprocessed.  */
!cpp_opts->preprocessed);

...

}

c_common_parse_file (void)
{
auto dumps = g->get_dumps ();
for (unsigned int i = 0;;)
{
c_finish_options ();
/* Open the dump file to use for the original dump output
here, to be used during parsing for the current file.  */
dumps->dump_start (TDI_original, &dump_flags);
pch_init ();
push_file_scope ();
c_parse_file ();
pop_file_scope ();
/* And end the main input file, if the debug writer wants it  */
if (debug_hooks->start_end_main_source_file)
(*debug_hooks->end_source_file) (0);
if (++i >= num_in_fnames)
break;
cpp_undef_all (parse_in);
cpp_clear_file_cache (parse_in);
this_input_filename
= cpp_read_main_file (parse_in, in_fnames[i]);

/* If an input file is missing, abandon further compilation.
cpplib has issued a diagnostic.  */
if (!this_input_filename)
break;
dumps->dump_finish (TDI_original);
}

  c_parse_final_cleanups ();
dumps->dump_finish (TDI_original);
}

 

/* Wrapper around cpp_get_token to skip CPP_PADDING tokens
and not consume CPP_EOF.  This does not perform the optional
streaming in preprocess_only mode, so is suitable to be used
when processing builtin expansions such as c_common_has_attribute.  */

static const cpp_token *
get_token_no_padding (cpp_reader *pfile)
{
for (;;)
{
const cpp_token *ret = cpp_peek_token (pfile, 0);
if (ret->type == CPP_EOF)
return ret;
ret = cpp_get_token (pfile);
if (ret->type != CPP_PADDING)
return ret;
}
}

http://www.lryc.cn/news/587319.html

相关文章:

  • 深度学习16(对抗生成网络:GAN+自动编码器)
  • 深入理解 Java JVM
  • Java: OracleHelper
  • MYSQL笔记2
  • 线性基学习笔记
  • 查看Linux服务器显卡使用情况的详细教程
  • 【UE教程/进阶】使用Slate
  • 【unitrix】 5.0 第二套类型级二进制数基本结构体(types2.rs)
  • SQL预编译:安全高效数据库操作的关键
  • 苍穹外卖Day3
  • markdown-it-mathjax3-pro —— 新一代 Markdown 数学公式渲染插件
  • vue的优缺点
  • 框架和库的区别
  • day16~17-系统负载高故障与磁盘管理
  • muduo概述
  • 电商系统未来三年趋势:体验升级、技术赋能与模式重构
  • ASP.NET Core 中的延迟注入:原理与实践
  • 【UE教程/进阶】UE中的指针与引用
  • 应用层协议和JSON的使用
  • gcc 源码阅读---程序入口
  • 面试150 从前序与中序遍历构造二叉树
  • python赤道上空的大气环流剖面图(纬向-高度剖面)
  • Node.js 聊天内容加密解密实战教程(含缓存密钥优化)
  • 【elementUI踩坑记录】解决 el-table 固定列 el-table__fixed 导致部分滚动条无法拖动的问题
  • QT控件命名简写
  • Burp suite的下载安装基础用法(密码喷洒,密码爆破)
  • Linux 系统——管理 MySQL
  • 超市管理系统
  • 问题记录:Fastjson序列化-空值字段处理
  • 数据结构 Map和Set