2020#include  " codegen/proxy/csv_scanner_proxy.h" 
2121#include  " codegen/proxy/runtime_functions_proxy.h" 
2222#include  " codegen/type/sql_type.h" 
23+ #include  " codegen/vector.h" 
2324#include  " planner/csv_scan_plan.h" 
2425
2526namespace  peloton  {
@@ -28,23 +29,25 @@ namespace codegen {
2829CSVScanTranslator::CSVScanTranslator (const  planner::CSVScanPlan &scan,
2930                                     CompilationContext &context,
3031                                     Pipeline &pipeline)
31-     : OperatorTranslator(context, pipeline), scan_(scan ) {
32+     : OperatorTranslator(scan, context, pipeline ) {
3233  //  Register the CSV scanner instance
33-   auto  &runtime_state  = context.GetRuntimeState ();
34-   scanner_id_ = runtime_state .RegisterState (
34+   auto  &query_state  = context.GetQueryState ();
35+   scanner_id_ = query_state .RegisterState (
3536      " csvScanner" CSVScannerProxy::GetType (GetCodeGen ()));
3637
3738  //  Load information about the attributes output by the scan plan
38-   scan_ .GetAttributes (output_attributes_);
39+   scan .GetAttributes (output_attributes_);
3940}
4041
41- void  CSVScanTranslator::InitializeState  () {
42+ void  CSVScanTranslator::InitializeQueryState  () {
4243  auto  &codegen = GetCodeGen ();
4344
45+   auto  &scan = GetPlanAs<planner::CSVScanPlan>();
46+ 
4447  //  Arguments
4548  llvm::Value *scanner_ptr = LoadStatePtr (scanner_id_);
46-   llvm::Value *exec_ctx_ptr = GetCompilationContext (). GetExecutorContextPtr ();
47-   llvm::Value *file_path = codegen.ConstString (scan_ .GetFileName (), " filePath" 
49+   llvm::Value *exec_ctx_ptr = GetExecutorContextPtr ();
50+   llvm::Value *file_path = codegen.ConstString (scan .GetFileName (), " filePath" 
4851
4952  auto  num_cols = static_cast <uint32_t >(output_attributes_.size ());
5053
@@ -71,20 +74,24 @@ void CSVScanTranslator::InitializeState() {
7174  //  Cast the runtime type to an opaque void*. This is because we're calling
7275  //  into pre-compiled C++ that doesn't know that the dynamically generated
7376  //  RuntimeState* looks like.
74-   llvm::Value *runtime_state_ptr  = codegen->CreatePointerCast (
77+   llvm::Value *query_state_ptr  = codegen->CreatePointerCast (
7578      codegen.GetState (), codegen.VoidType ()->getPointerTo ());
7679
7780  //  Call CSVScanner::Init()
7881  codegen.Call (CSVScannerProxy::Init,
7982               {scanner_ptr, exec_ctx_ptr, file_path, output_col_types,
80-                 codegen.Const32 (num_cols), consumer_func, runtime_state_ptr ,
81-                 codegen.Const8 (scan_ .GetDelimiterChar ()),
82-                 codegen.Const8 (scan_ .GetQuoteChar ()),
83-                 codegen.Const8 (scan_ .GetEscapeChar ())});
83+                 codegen.Const32 (num_cols), consumer_func, query_state_ptr ,
84+                 codegen.Const8 (scan .GetDelimiterChar ()),
85+                 codegen.Const8 (scan .GetQuoteChar ()),
86+                 codegen.Const8 (scan .GetEscapeChar ())});
8487}
8588
8689namespace  {
8790
91+ /* *
92+  * This is a deferred column access class configured to load the contents of a 
93+  * given column. 
94+  */  
8895class  CSVColumnAccess  : public  RowBatch ::AttributeAccess {
8996 public: 
9097  CSVColumnAccess (const  planner::AttributeInfo *ai, llvm::Value *csv_columns,
@@ -94,6 +101,12 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
94101        null_str_(std::move(null_str)),
95102        runtime_null_(runtime_null_str) {}
96103
104+   // ////////////////////////////////////////////////////////////////////////////
105+   // /
106+   // / Accessors
107+   // /
108+   // ////////////////////////////////////////////////////////////////////////////
109+ 
97110  llvm::Value *Columns () const  { return  csv_columns_; }
98111
99112  uint32_t  ColumnIndex () const  { return  ai_->attribute_id ; }
@@ -102,6 +115,25 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
102115
103116  const  type::SqlType &SqlType () const  { return  ai_->type .GetSqlType (); }
104117
118+   // ////////////////////////////////////////////////////////////////////////////
119+   // /
120+   // / Logic
121+   // /
122+   // ////////////////////////////////////////////////////////////////////////////
123+ 
124+   /* *
125+    * Check if a column's value is considered NULL. Given a pointer to the 
126+    * column's string value, and the length of the string, this function will 
127+    * check if the column's value is determined to be NULL. This is done by 
128+    * comparing the column's contents with the NULL string configured in the 
129+    * CSV scan plan (i.e., provided by the user). 
130+    * 
131+    * @param codegen The codegen instance 
132+    * @param data_ptr A pointer to the column's string value 
133+    * @param data_len The length of the column's string value 
134+    * @return True if the column is equivalent to the NULL string. False 
135+    * otherwise. 
136+    */  
105137  llvm::Value *IsNull (CodeGen &codegen, llvm::Value *data_ptr,
106138                      llvm::Value *data_len) const  {
107139    uint32_t  null_str_len = static_cast <uint32_t >(null_str_.length ());
@@ -127,6 +159,16 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
127159    return  check_null.BuildPHI (cmp_res, codegen.ConstBool (false ));
128160  }
129161
162+   /* *
163+    * Load the value of the given column with the given type, ignoring a null 
164+    * check. 
165+    * 
166+    * @param codegen The codegen instance 
167+    * @param type The SQL type of the column 
168+    * @param data_ptr A pointer to the column's string representation 
169+    * @param data_len The length of the column's string representation 
170+    * @return The parsed value 
171+    */  
130172  Value LoadValueIgnoreNull (CodeGen &codegen, llvm::Value *type,
131173                            llvm::Value *data_ptr,
132174                            llvm::Value *data_len) const  {
@@ -144,6 +186,15 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
144186    }
145187  }
146188
189+   /* *
190+    * Access this column in the given row. In reality, this function pulls out 
191+    * the column information from the CSVScanner state and loads/parses the 
192+    * column's value. 
193+    * 
194+    * @param codegen The codegen instance 
195+    * @param row The row. This isn't used. 
196+    * @return The value of the column 
197+    */  
147198  Value Access (CodeGen &codegen, UNUSED_ATTRIBUTE RowBatch::Row &row) override  {
148199    //  Load the type, data pointer and length values for the column
149200    auto  *type = codegen->CreateConstInBoundsGEP2_32 (
@@ -178,22 +229,31 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
178229  }
179230
180231 private: 
232+   //  Information about the attribute
181233  const  planner::AttributeInfo *ai_;
234+ 
235+   //  A pointer to the array of columns
182236  llvm::Value *csv_columns_;
237+ 
238+   //  The NULL string configured for the CSV scan
183239  const  std::string null_str_;
240+ 
241+   //  The runtime NULL string (a constant in LLVM)
184242  llvm::Value *runtime_null_;
185243};
186244
187245}  //  namespace
188246
247+ //  We define the callback/consumer function for CSV parsing here
189248void  CSVScanTranslator::DefineAuxiliaryFunctions () {
190249  CodeGen &codegen = GetCodeGen ();
191250  CompilationContext &cc = GetCompilationContext ();
192251
252+   auto  &scan = GetPlanAs<planner::CSVScanPlan>();
253+ 
193254  //  Define consumer function here
194255  std::vector<FunctionDeclaration::ArgumentInfo> arg_types = {
195-       {" runtimeState" 
196-        cc.GetRuntimeState ().FinalizeType (codegen)->getPointerTo ()}};
256+       {" queryState" GetQueryState ().GetType ()->getPointerTo ()}};
197257  FunctionDeclaration decl{codegen.GetCodeContext (), " consumer" 
198258                           FunctionDeclaration::Visibility::Internal,
199259                           codegen.VoidType (), arg_types};
@@ -209,13 +269,13 @@ void CSVScanTranslator::DefineAuxiliaryFunctions() {
209269    llvm::Value *cols = codegen->CreateLoad (codegen->CreateConstInBoundsGEP2_32 (
210270        CSVScannerProxy::GetType (codegen), LoadStatePtr (scanner_id_), 0 , 1 ));
211271
212-     llvm::Value *null_str = codegen.ConstString (scan_ .GetNullString (), " null" 
272+     llvm::Value *null_str = codegen.ConstString (scan .GetNullString (), " null" 
213273
214274    //  Add accessors for all columns into the row batch
215275    std::vector<CSVColumnAccess> column_accessors;
216276    for  (uint32_t  i = 0 ; i < output_attributes_.size (); i++) {
217277      column_accessors.emplace_back (output_attributes_[i], cols,
218-                                     scan_ .GetNullString (), null_str);
278+                                     scan .GetNullString (), null_str);
219279    }
220280    for  (uint32_t  i = 0 ; i < output_attributes_.size (); i++) {
221281      one.AddAttribute (output_attributes_[i], &column_accessors[i]);
@@ -238,17 +298,10 @@ void CSVScanTranslator::Produce() const {
238298  GetCodeGen ().Call (CSVScannerProxy::Produce, {scanner_ptr});
239299}
240300
241- void  CSVScanTranslator::TearDownState  () {
301+ void  CSVScanTranslator::TearDownQueryState  () {
242302  auto  *scanner_ptr = LoadStatePtr (scanner_id_);
243303  GetCodeGen ().Call (CSVScannerProxy::Destroy, {scanner_ptr});
244304}
245305
246- std::string CSVScanTranslator::GetName () const  {
247-   return  StringUtil::Format (
248-       " CSVScan(file: '%s', delimiter: '%c', quote: '%c', escape: '%c')" 
249-       scan_.GetFileName ().c_str (), scan_.GetDelimiterChar (),
250-       scan_.GetQuoteChar (), scan_.GetEscapeChar ());
251- }
252- 
253306}  //  namespace codegen
254- }  //  namespace peloton
307+ }  //  namespace peloton
0 commit comments