Not logged in
markdown.cc at [b50c6ced7f]

File scripts/markdown.cc as of check-in [b50c6ced7f]



#include <stdio.h>
#include <stdlib.h>
#include "markdown.h"
  

  
// void process_markdown()
// {
//   MyTSchar& fileName;
//   
//   
//   Tcl_Obj* readObjPtr=Tcl_NewObj();
//   Tcl_Channel channel=Tcl_OpenFileChannel(stateM.ip_,fileName.v(),"r",0);
//   if(!channel){
//     return cu_print_error(stateM.ip_,"Could not open file '%s'",fileName.v());
//   }
//   Tcl_SetChannelOption(stateM.ip_,channel,"-encoding","utf-8");
//   Tcl_ReadChars(channel,readObjPtr,2000,0);
//   char* tbuf=(char*)Tcl_GetStringFromObj(readObjPtr,&length);
//   result=Tcl_Close(stateM.ip_,channel);
// 
//   Tcl_DecrRefCount(readObjPtr);
//   return 0;
// }

inline int is_chars(int mychar,const char* chars)
{
  return strchr(chars,mychar)!=NULL;
}

inline int is_same_char_line(MyTSchar& buffer,int start,int end,
  int allow_spaces,int min_chars)
{
  int num_chars=1;
  for(int i=start+1;i<end;i++){
    if(is_chars(buffer[i]," \t")){
      if(allow_spaces==0) return 0;
    } else if(buffer[i]!=buffer[start]){
      return 0;
    } else {
      num_chars++; 
    }
  }
  if(num_chars>=min_chars) return 1;
  else return 0;
}

inline int num_same_char_line_prefix(MyTSchar& buffer,int start,int end,
  int allow_spaces)
{
  int num_chars=1;
  for(int i=start+1;i<end;i++){
    if(is_chars(buffer[i]," \t")){
      if(allow_spaces==0) break;
    } else if(buffer[i]!=buffer[start]){
      break;
    } else {
      num_chars++; 
    }
  }
  return num_chars;
}

inline int link_label_num(MyTSchar& buffer,int start)
{
  int is_open=0,num_chars=0;
  for(int i=start;i<buffer.num();i++){
    if(is_chars(buffer[i]," \t")){
      continue;
    } else if(is_chars(buffer[i],"\r\n")){
      return 0;
    } else if(is_chars(buffer[i],"[")){
      if(!is_open) is_open=1;
      else return 0;
    } else if(is_chars(buffer[i],"]")){
      if(num_chars==0) return 0;
      return i;
    } else if(is_chars(buffer[i],"\\")){
      num_chars++;
      i++;
    } else {
      num_chars++;
    }
  }
  return 0;
}

inline int link_destination_num(MyTSchar& buffer,int start)
{
  int is_open=0,num_endlines=0,smaller_than=0,parentheses=0;
  
  for(int i=start;i<buffer.num();i++){
    if(is_chars(buffer[i]," \t")){
      if(is_open && smaller_than==0){
        if(parentheses) return 0;
        return i;
      }
      continue;
    } else if(is_chars(buffer[i],"\r\n")){
      if(!is_open){
        num_endlines++;
        if(num_endlines>1) return 0;
      } else if(smaller_than==0){
        if(parentheses) return 0; 
        return i;
      } else return 0;
    } else if(is_chars(buffer[i],"<")){
      if(!is_open){
        is_open=1;
        smaller_than=1;
      } else if(smaller_than>0){
        return 0;
      }
    } else if(is_chars(buffer[i],">")){
      if(smaller_than>0){
        if(parentheses) return 0;
        return i+1;
      }
      is_open=1;
    } else if(is_chars(buffer[i],"(")){
      parentheses++;
      is_open=1;
    } else if(is_chars(buffer[i],")")){
      parentheses--;
      is_open=1;
    } else if(is_chars(buffer[i],"\\")){
      is_open=1;
      i++;
    } else {
      is_open=1;
    }
  }
  if(is_open && smaller_than==0) return buffer.num();
  return 0;
}

inline int spaces_num(MyTSchar& buffer,int start,int& num_spaces)
{
  num_spaces=0;
  for(int i=start;i<buffer.num();i++){
    if(is_chars(buffer[i]," ")){
      num_spaces++;
    } else if(is_chars(buffer[i],"\t")){
      num_spaces+=4;
    } else if(is_chars(buffer[i],"\r\n")){
      return i;
    } else {
      return 0;
    }
  }
  return buffer.num();
}
  

inline int link_title_num(MyTSchar& buffer,int start)
{
  int is_open=0,num_spaces=0,num_endlines=0,num_enlinesA=0;
  char openC;
  
  for(int i=start;i<buffer.num();i++){
    if(is_chars(buffer[i]," \t")){
      num_spaces++;
    } else if(is_chars(buffer[i],"\r\n")){
      if(!is_open){
        num_spaces++;
        num_endlines++;
        if(num_endlines>1) return 0;
      } else {
        if(num_enlinesA>0) return 0;
        num_enlinesA++;
      }
    } else if(is_chars(buffer[i],"\"'")){
      if(!is_open){
        openC=buffer[i];
        is_open=1;
      } else if(buffer[i]==openC){
        return i+1;
      }
    } else if(is_chars(buffer[i],"(")){
      if(!is_open){
        openC=buffer[i];
        is_open=1;
      } else if(openC=='('){
        return 0;
      }
    } else if(is_chars(buffer[i],")")){
      if(!is_open) return 0;
      else if(openC=='('){
        return i+1;
      }
    } else if(is_chars(buffer[i],"\\")){
      if(!is_open) return 0;
      i++;
    } else {
      if(!is_open) return 0;
      is_open=1;
    }
  }
  return 0;
}

inline int link_url_num(MyTSchar& buffer,int start)
{
  int is_open=0,num_chars=0;
  for(int i=start;i<buffer.num();i++){
    if(is_chars(buffer[i]," \t")){
      continue;
    } else if(is_chars(buffer[i],"\r\n")){
      return 0;
    } else if(is_chars(buffer[i],"[")){
      if(!is_open) is_open=1;
      else return 0;
    } else if(is_chars(buffer[i],"]")){
      if(num_chars==0) return 0;
      return i-start+1;
    } else if(is_chars(buffer[i],"\\")){
      num_chars++;
      i++;
    } else {
      num_chars++;
    }
  }
  return 0;
}

void MKstate::create_append_paragraph(MyTSchar& buffer,int parent_level)
{
  int num_spaces;
  
//################################################################################
//    block_quote
//################################################################################
  
  if(blanks_line_[0]<=3 && is_chars(buffer[SE_lineNoBlank_[0]],"<")){
    if(this->give_type(parent_level)!=MK::block_quote){
      if(this->give_curr_type()==MK::fenced_code_block){
        goto no_block_quote;
      }
      while(open_blocks_.num()>parent_level) this->curr_close();
      this->append_block(MK::block_quote,SE_line_,SE_lineNoBlank_,
        blanks_line_);
    }
    SE_line_[0]=SE_lineNoBlank_[0]+1;
    if(is_chars(buffer[SE_line_[0]]," ")) SE_line_[0]++;
    SE_lineNoBlank_[0]=spaces_num(buffer,SE_line_[0],blanks_line_[0]);
    this->create_append_paragraph(buffer,parent_level+1);
    return;
  }
  
  if(parent_level>0 && this->give_type(parent_level)==MK::block_quote){
    while(open_blocks_.num()>parent_level) this->curr_close();
  }
  
  :no_block_quote;
  
//################################################################################
//    fenced_code_block
//################################################################################
  
  if(this->give_curr_type()==MK::fenced_code_block){
    if(blanks_line_[0]<=3 && is_chars(buffer[SE_lineNoBlank_[0]],"`~")){
      MKblocks& block=blocks_[open_blocks_[end_MTS]];
      int valid=is_same_char_line(buffer,SE_lineNoBlank_[0],
        SE_lineNoBlank_[1],0,block.level_);
      if(buffer[SE_lineNoBlank_[0]]!=buffer[block.posNoBlank_[0]]) valid=0;
      if(valid){
        block.pos_[1]=SE_line_[1];
        block.posNoBlank_[1]=SE_lineNoBlank_[1];
        block.blanks_[1]=blanks_line_[1];
        this->curr_close();
        return;
      }
    }
    MKblocks& block=blocks_[open_blocks_[end_MTS]];
    block.pos_[1]=SE_line_[1];
    block.posNoBlank_[1]=SE_lineNoBlank_[1];
    block.blanks_[1]=blanks_line_[1];
    return;
  }
  
  if(blanks_line_[0]<=3 && is_chars(buffer[SE_lineNoBlank_[0]],"`~")){
    int level=num_same_char_line_prefix(buffer,SE_lineNoBlank_[0],SE_lineNoBlank_[1],0);
    if(level>=3){
      if(blockquote_levelN!=blockquote_level) this->close_all();
      else if(this->curr_is_open()) this->curr_close();
      MKblocks& block=this->append_block(MK::fenced_code_block,SE_line_,
        SE_lineNoBlank_,blanks_line_);
      block.level_=level;
      return;
    }
  }
      
//################################################################################
//    indented_code_block
//################################################################################
  
  if(this->give_curr_type()==MK::indented_code_block){
    if(blanks_line_[0]>=4){
      MKblocks& block=blocks_[open_blocks_[end_MTS]];
      block.pos_[1]=SE_line_[1];
      block.posNoBlank_[1]=SE_lineNoBlank_[1];
      block.blanks_[1]=blanks_line_[1];
      return;
    }
    this->curr_close();
  }
  if(!this->curr_is_open() && blanks_line_[0]>=4){
    this->append_block(MK::indented_code_block,SE_line_,SE_lineNoBlank_,
      blanks_line_);
    return;
  }
      
//################################################################################
//    setext_heading
//################################################################################
  
  if(this->give_curr_type()==MK::paragraph &&
    blanks_line_[0]<=3 &&
    is_chars(buffer[SE_lineNoBlank_[0]],"-=")){
    int valid=is_same_char_line(buffer,SE_lineNoBlank_[0],SE_lineNoBlank_[1],0,1);
    if(valid){
      MKblocks& block=blocks_[open_blocks_[end_MTS]];
      block.mtype_=MK::setext_heading;
      if(buffer[SE_lineNoBlank_[0]]=='=') block.level_=1;
      else block.level_=2;
      this->curr_close();
      return;
    }
  }

//################################################################################
//    thematic_break
//################################################################################
  
  if(blanks_line_[0]<=3 && is_chars(buffer[SE_lineNoBlank_[0]],"-_*")){
    int valid=is_same_char_line(buffer,SE_lineNoBlank_[0],SE_lineNoBlank_[1],1,3);
    if(valid){
      if(blockquote_levelN!=blockquote_level) this->close_all();
      else if(this->curr_is_open()) this->curr_close();
      this->append_block(MK::thematic_break,SE_line_,
        SE_lineNoBlank_,blanks_line_);
      this->curr_close();
      return;
    }
  }
  
//################################################################################
//    ATX_heading
//################################################################################
  
  if(blanks_line_[0]<=3 && is_chars(buffer[SE_lineNoBlank_[0]],"#")){
    IntVector2D level,start_end,blank;
    level[0]=1;
    for(int i=SE_lineNoBlank_[0]+1;i<SE_lineNoBlank_[1];i++){
      if(is_chars(buffer[i],"#") && start_end[0]==0){
        level[0]++;
      } else if(is_chars(buffer[i],"#") && blank[1]>0){
        level[1]++;
      } else if(is_chars(buffer[i]," \t")){
        if(start_end[0]==0) blank[0]++;
        else blank[1]++;
      } else {
        if(start_end[0]==0) start_end[0]=i;
        start_end[1]=i;
        blank[1]=0;
      }
    }
    if(level[0]>=6 && blank[0]>=1){
      if(blockquote_levelN!=blockquote_level) this->close_all();
      else if(this->curr_is_open()) this->curr_close();
      start_end[1]++;
      MKblocks& block=this->append_block(MK::ATX_heading,SE_line_,start_end,
        blanks_line_);
      block.level_=level[0];
      this->curr_close();
      return;
    }
  }
  
//################################################################################
//    HTML_block
//################################################################################
  
  if(this->give_curr_type()==MK::HTML_block){
    const char* rexsEnd[]={
        "<(?in)\\A</(script|pre|style)\\s*>?$",
        "(?in)\\A.*-->",
        "(?in)\\A.*\\?>",
        "(?in)\\A.*>",
        "(?in)\\A.*\\]\\]>",
        "(?in)\\A\\s*$",
        "(?in)\\A\\s*$"
      };
    MKblocks& block=blocks_[open_blocks_[end_MTS]];
    block.pos_[1]=SE_line_[1];
    block.posNoBlank_[1]=SE_lineNoBlank_[1];
    block.blanks_[1]=blanks_line_[1];
    
    if(buffer.regexp(SE_lineNoBlank_[0],rexsEnd[block.level_-1])){
      this->curr_close();
    }
    return;
  }
  
  if(blanks_line_[0]<=3 && is_chars(buffer[SE_lineNoBlank_[0]],"<")){
    const char* rexsStart[]={
      "(?in)\\A<(script|pre|style)\\s*>?$",
        "(?in)\\A<!--",
        "(?in)\\A<\\?",
        "(?in)\\A<![A-Z]",
        "(?in)\\A<!\\[CDATA\\[",
        "(?in)\\A</?(address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|"
        "dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|"
        "h1|h2|h3|h4|h5|h6|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|"
        "noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|"
        "thead|title|tr|track|ul)\\s*/*>?$",
        "(?in)\\A</?[-\\w]+[^>]*>$"
      };
    int level=0;
    for(int i=0;i<7;i++){
      if(buffer.regexp(SE_lineNoBlank_[0],rexsStart[i])){
        level=i+1;
        break;
      }
    }
    if((level>0 && level<7) || (level==7 && this->curr_is_open()==0)){
      if(blockquote_levelN!=blockquote_level) this->close_all();
      else if(this->curr_is_open()) this->curr_close();
      MKblocks& block=this->append_block(MK::HTML_block,SE_line_,
        SE_lineNoBlank_,blanks_line_);
      block.level_=level;
      return;
    }
  }
  
//################################################################################
//    link_reference_definition
//################################################################################
  
  if(!this->curr_is_open() && blanks_line_[0]<=3  &&
    is_chars(buffer[SE_lineNoBlank_[0]],"[")){
    int c=link_label_num(buffer,SE_lineNoBlank_[0]);
    int c_noblank;
    if(c){
      if(buffer[c]!=':') c=0;
      else c++;
    }
    if(c){
      c=link_destination_num(buffer,c);
    }
    if(c){
      int c_alt=link_title_num(buffer,c);
      if(c_alt!=0){
        int c_alt2=spaces_num(buffer,c_alt,num_spaces);
        if(c_alt2!=0){
          c_noblank=c_alt;
          c=c_alt2;
        } else {
          c_alt=0;
        }
      }
      if(c_alt==0){
        c_noblank=c;
        c=spaces_num(buffer,c,num_spaces);
      }
    }
    if(c){
      SE_lineNoBlank_[1]=c_noblank;
      SE_line_[1]=c;
      blanks_line_[1]=c-c_noblank;
      this->append_block(MK::link_reference_definition,SE_line_,
        SE_lineNoBlank_,blanks_line_);
      pos_=SE_line_[1];
      return;
    }
  }
  
//################################################################################
//    normal paragraph
//################################################################################
  
  if(SE_lineNoBlank_[0]==SE_lineNoBlank_[1]-1){
    if(blockquote_levelN!=blockquote_level) this->close_all();
    else if(this->curr_is_open()) this->curr_close();
  } else if(this->curr_is_open()){
    MKblocks& block=blocks_[open_blocks_[end_MTS]];
    block.pos_[1]=SE_line_[1];
    block.posNoBlank_[1]=SE_lineNoBlank_[1];
    block.blanks_[1]=blanks_line_[1];
  } else {
    if(blockquote_levelN!=blockquote_level) this->close_all();
    this->append_block(MK::paragraph,SE_line_,SE_lineNoBlank_,
      blanks_line_);
  }
}

void MKstate::process_markdown(MyTSchar& buffer)
{
  MKblocks& block=this->append_block(MK::document);
  block.open_=0;
    
  for(int i=0;i<buffer.num();i++){
    switch(buffer[i]){
      case ' ':
      {
        if(SE_line_[0]==-1){
          SE_line_[0]=i;
        }
        if(SE_lineNoBlank_[0]==-1){
          blanks_line_[0]++;
        } else {
          blanks_line_[1]++;
        }
      }
      break;
      case '\t':
      {
        if(SE_line_[0]==-1){
          SE_line_[0]=i;
        }
        if(SE_lineNoBlank_[0]==-1){
          blanks_line_[0]+=4;
        } else {
          blanks_line_[1]+=4;
        }
      }
      break;
      case '\n':  case '\r':
      {
        if(SE_line_[0]==-1){
          SE_line_[0]=i;
        }
        if(SE_lineNoBlank_[0]==-1){
          SE_lineNoBlank_[0]=i;
        }
        if(SE_lineNoBlank_[1]==-1){
          SE_lineNoBlank_[1]=i;
        }
        SE_line_[1]=i;
        
        pos_=i;
        this->create_append_paragraph(buffer);
        i=pos_;
        
        SE_line_[0]=SE_line_[1]=-1;
        SE_lineNoBlank_[0]=SE_lineNoBlank_[1]=-1;
        blanks_line_[0]=0;

        if(i<buffer.num()-1 && buffer[i]=='\r' && buffer[i+1]=='\n'){
          i++;
        }
      }
      break;
      default:
      {
        if(SE_line_[0]==-1){
          SE_line_[0]=i;
        }
        if(SE_lineNoBlank_[0]==-1){
          SE_lineNoBlank_[0]=i;
        }
        SE_lineNoBlank_[1]=i;
        blanks_line_[1]=0;
      }
      break;
    }
  }
}

int process_markdown()
{
  MyTSchar buffer;
  
  const char* fileIn="C:\\Users\\ramsan\\Dropbox\\temporal\\kkdoc\\strengths_in_shells.md";
  const char* fileOut="C:\\Users\\ramsan\\Dropbox\\temporal\\kkdoc\\strengths_in_shells.html";
  
  FILE* fin=fopen(fileIn,"rb");
  if(!fin){
    fprintf(stderr,"file not found\n");
    exit(2);
  }
  fseek(fin,0,SEEK_END);
  size_t lSize=ftell(fin);
  rewind(fin);
  
  buffer.set_num(lSize);
  size_t result=fread(buffer.v(),1,lSize,fin);
  fclose(fin);
  if(result!=lSize){
    fprintf(stderr,"file read incorrectly\n");
    exit(2);
  }
  MKstate mkstate;
  mkstate.process_markdown(buffer);
  return 0;
}